[Python-checkins] gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)

corona10 webhook-mailer at python.org
Sat Jul 30 23:15:02 EDT 2022


https://github.com/python/cpython/commit/50b2261bdac98303087287b24eef96abd45a82f9
commit: 50b2261bdac98303087287b24eef96abd45a82f9
branch: main
author: Dong-hee Na <donghee.na at python.org>
committer: corona10 <donghee.na92 at gmail.com>
date: 2022年07月31日T12:14:53+09:00
summary:
gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
files:
A Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
M Objects/unicodeobject.c
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
new file mode 100644
index 0000000000000..52568dbedd130
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst	
@@ -0,0 +1,2 @@
+Reduce allocation size of :class:`list` from :meth:`str.split`
+and :meth:`str.rsplit`. Patch by Dong-hee Na.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ad16ada16fe3b..355d74fe3bbda 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9696,40 +9696,40 @@ split(PyObject *self,
 const void *buf1, *buf2;
 Py_ssize_t len1, len2;
 PyObject* out;
-
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
 
 if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
 case PyUnicode_1BYTE_KIND:
 if (PyUnicode_IS_ASCII(self))
 return asciilib_split_whitespace(
 self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 else
 return ucs1lib_split_whitespace(
 self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 case PyUnicode_2BYTE_KIND:
 return ucs2lib_split_whitespace(
 self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 case PyUnicode_4BYTE_KIND:
 return ucs4lib_split_whitespace(
 self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 default:
 Py_UNREACHABLE();
 }
 
- kind1 = PyUnicode_KIND(self);
 kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
 len2 = PyUnicode_GET_LENGTH(substring);
 if (kind1 < kind2 || len1 < len2) {
 out = PyList_New(1);
@@ -9783,39 +9783,40 @@ rsplit(PyObject *self,
 Py_ssize_t len1, len2;
 PyObject* out;
 
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
 
 if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
 case PyUnicode_1BYTE_KIND:
 if (PyUnicode_IS_ASCII(self))
 return asciilib_rsplit_whitespace(
 self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 else
 return ucs1lib_rsplit_whitespace(
 self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 case PyUnicode_2BYTE_KIND:
 return ucs2lib_rsplit_whitespace(
 self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 case PyUnicode_4BYTE_KIND:
 return ucs4lib_rsplit_whitespace(
 self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
 );
 default:
 Py_UNREACHABLE();
 }
 
- kind1 = PyUnicode_KIND(self);
 kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
 len2 = PyUnicode_GET_LENGTH(substring);
 if (kind1 < kind2 || len1 < len2) {
 out = PyList_New(1);


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /