[Python-checkins] gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
corona10
webhook-mailer at python.org
Sat Jul 30 23:15:02 EDT 2022
https://github.com/python/cpython/commit/50b2261bdac98303087287b24eef96abd45a82f9
commit: 50b2261bdac98303087287b24eef96abd45a82f9
branch: main
author: Dong-hee Na <donghee.na at python.org>
committer: corona10 <donghee.na92 at gmail.com>
date: 2022年07月31日T12:14:53+09:00
summary:
gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
files:
A Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
M Objects/unicodeobject.c
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
new file mode 100644
index 0000000000000..52568dbedd130
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
@@ -0,0 +1,2 @@
+Reduce allocation size of :class:`list` from :meth:`str.split`
+and :meth:`str.rsplit`. Patch by Dong-hee Na.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ad16ada16fe3b..355d74fe3bbda 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9696,40 +9696,40 @@ split(PyObject *self,
const void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
-
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
return asciilib_split_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
else
return ucs1lib_split_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_2BYTE_KIND:
return ucs2lib_split_whitespace(
self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_4BYTE_KIND:
return ucs4lib_split_whitespace(
self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
default:
Py_UNREACHABLE();
}
- kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
@@ -9783,39 +9783,40 @@ rsplit(PyObject *self,
Py_ssize_t len1, len2;
PyObject* out;
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
return asciilib_rsplit_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
else
return ucs1lib_rsplit_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_2BYTE_KIND:
return ucs2lib_rsplit_whitespace(
self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_4BYTE_KIND:
return ucs4lib_rsplit_whitespace(
self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
default:
Py_UNREACHABLE();
}
- kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
More information about the Python-checkins
mailing list