[Python-checkins] GH-98363: Have batched() return tuples (GH-100118)

rhettinger webhook-mailer at python.org
Thu Dec 8 16:08:25 EST 2022


https://github.com/python/cpython/commit/35cc0ea736a323119157117d93e5d68d8247e89f
commit: 35cc0ea736a323119157117d93e5d68d8247e89f
branch: main
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: rhettinger <rhettinger at users.noreply.github.com>
date: 2022年12月08日T15:08:16-06:00
summary:
GH-98363: Have batched() return tuples (GH-100118)
files:
M Doc/library/itertools.rst
M Lib/test/test_itertools.py
M Modules/clinic/itertoolsmodule.c.h
M Modules/itertoolsmodule.c
diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index 0b5978505a96..624d2430ac20 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -52,7 +52,7 @@ Iterator Arguments Results
 Iterator Arguments Results Example
 ============================ ============================ ================================================= =============================================================
 :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
-:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G``
+:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=3) --> ABC DEF G``
 :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F``
 :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F``
 :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
@@ -166,11 +166,11 @@ loops that truncate the stream.
 
 .. function:: batched(iterable, n)
 
- Batch data from the *iterable* into lists of length *n*. The last
+ Batch data from the *iterable* into tuples of length *n*. The last
 batch may be shorter than *n*.
 
- Loops over the input iterable and accumulates data into lists up to
- size *n*. The input is consumed lazily, just enough to fill a list.
+ Loops over the input iterable and accumulates data into tuples up to
+ size *n*. The input is consumed lazily, just enough to fill a batch.
 The result is yielded as soon as the batch is full or when the input
 iterable is exhausted:
 
@@ -179,14 +179,14 @@ loops that truncate the stream.
 >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet']
 >>> unflattened = list(batched(flattened_data, 2))
 >>> unflattened
- [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']]
+ [('roses', 'red'), ('violets', 'blue'), ('sugar', 'sweet')]
 
 >>> for batch in batched('ABCDEFG', 3):
 ... print(batch)
 ...
- ['A', 'B', 'C']
- ['D', 'E', 'F']
- ['G']
+ ('A', 'B', 'C')
+ ('D', 'E', 'F')
+ ('G',)
 
 Roughly equivalent to::
 
@@ -195,7 +195,7 @@ loops that truncate the stream.
 if n < 1:
 raise ValueError('n must be at least one')
 it = iter(iterable)
- while (batch := list(islice(it, n))):
+ while (batch := tuple(islice(it, n))):
 yield batch
 
 .. versionadded:: 3.12
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 5f5bcbc7cfb8..b447b6cbab9c 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -161,11 +161,11 @@ def test_accumulate(self):
 
 def test_batched(self):
 self.assertEqual(list(batched('ABCDEFG', 3)),
- [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']])
+ [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)])
 self.assertEqual(list(batched('ABCDEFG', 2)),
- [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']])
+ [('A', 'B'), ('C', 'D'), ('E', 'F'), ('G',)])
 self.assertEqual(list(batched('ABCDEFG', 1)),
- [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']])
+ [('A',), ('B',), ('C',), ('D',), ('E',), ('F',), ('G',)])
 
 with self.assertRaises(TypeError): # Too few arguments
 list(batched('ABCDEFG'))
@@ -188,8 +188,8 @@ def test_batched(self):
 with self.subTest(s=s, n=n, batches=batches):
 # Order is preserved and no data is lost
 self.assertEqual(''.join(chain(*batches)), s)
- # Each batch is an exact list
- self.assertTrue(all(type(batch) is list for batch in batches))
+ # Each batch is an exact tuple
+ self.assertTrue(all(type(batch) is tuple for batch in batches))
 # All but the last batch is of size n
 if batches:
 last_batch = batches.pop()
@@ -1809,12 +1809,12 @@ class TestPurePythonRoughEquivalents(unittest.TestCase):
 
 def test_batched_recipe(self):
 def batched_recipe(iterable, n):
- "Batch data into lists of length n. The last batch may be shorter."
+ "Batch data into tuples of length n. The last batch may be shorter."
 # batched('ABCDEFG', 3) --> ABC DEF G
 if n < 1:
 raise ValueError('n must be at least one')
 it = iter(iterable)
- while (batch := list(islice(it, n))):
+ while (batch := tuple(islice(it, n))):
 yield batch
 
 for iterable, n in product(
@@ -2087,7 +2087,7 @@ def test_accumulate(self):
 
 def test_batched(self):
 s = 'abcde'
- r = [['a', 'b'], ['c', 'd'], ['e']]
+ r = [('a', 'b'), ('c', 'd'), ('e',)]
 n = 2
 for g in (G, I, Ig, L, R):
 with self.subTest(g=g):
diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h
index 17f9ebb24939..287de524e913 100644
--- a/Modules/clinic/itertoolsmodule.c.h
+++ b/Modules/clinic/itertoolsmodule.c.h
@@ -12,19 +12,19 @@ PyDoc_STRVAR(batched_new__doc__,
 "batched(iterable, n)\n"
 "--\n"
 "\n"
-"Batch data into lists of length n. The last batch may be shorter than n.\n"
+"Batch data into tuples of length n. The last batch may be shorter than n.\n"
 "\n"
-"Loops over the input iterable and accumulates data into lists\n"
+"Loops over the input iterable and accumulates data into tuples\n"
 "up to size n. The input is consumed lazily, just enough to\n"
-"fill a list. The result is yielded as soon as a batch is full\n"
+"fill a batch. The result is yielded as soon as a batch is full\n"
 "or when the input iterable is exhausted.\n"
 "\n"
 " >>> for batch in batched(\'ABCDEFG\', 3):\n"
 " ... print(batch)\n"
 " ...\n"
-" [\'A\', \'B\', \'C\']\n"
-" [\'D\', \'E\', \'F\']\n"
-" [\'G\']");
+" (\'A\', \'B\', \'C\')\n"
+" (\'D\', \'E\', \'F\')\n"
+" (\'G\',)");
 
 static PyObject *
 batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n);
@@ -913,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 exit:
 return return_value;
 }
-/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0229ebd72962f130 input=a9049054013a1b77]*/
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index d45000788c53..60ec11c32d01 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -56,11 +56,13 @@ static PyTypeObject pairwise_type;
 /* batched object ************************************************************/
 
 /* Note: The built-in zip() function includes a "strict" argument
- that is needed because that function can silently truncate data
- and there is no easy way for a user to detect that condition.
- The same reasoning does not apply to batched() which never drops
- data. Instead, it produces a shorter list which can be handled
- as the user sees fit.
+ that was needed because that function would silently truncate data,
+ and there was no easy way for a user to detect the data loss.
+ The same reasoning does not apply to batched() which never drops data.
+ Instead, batched() produces a shorter tuple which can be handled
+ as the user sees fit. If requested, it would be reasonable to add
+ "fillvalue" support which had demonstrated value in zip_longest().
+ For now, the API is kept simple and clean.
 */
 
 typedef struct {
@@ -74,25 +76,25 @@ typedef struct {
 itertools.batched.__new__ as batched_new
 iterable: object
 n: Py_ssize_t
-Batch data into lists of length n. The last batch may be shorter than n.
+Batch data into tuples of length n. The last batch may be shorter than n.
 
-Loops over the input iterable and accumulates data into lists
+Loops over the input iterable and accumulates data into tuples
 up to size n. The input is consumed lazily, just enough to
-fill a list. The result is yielded as soon as a batch is full
+fill a batch. The result is yielded as soon as a batch is full
 or when the input iterable is exhausted.
 
 >>> for batch in batched('ABCDEFG', 3):
 ... print(batch)
 ...
- ['A', 'B', 'C']
- ['D', 'E', 'F']
- ['G']
+ ('A', 'B', 'C')
+ ('D', 'E', 'F')
+ ('G',)
 
 [clinic start generated code]*/
 
 static PyObject *
 batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n)
-/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/
+/*[clinic end generated code: output=7ebc954d655371b6 input=ffd70726927c5129]*/
 {
 PyObject *it;
 batchedobject *bo;
@@ -150,12 +152,12 @@ batched_next(batchedobject *bo)
 if (it == NULL) {
 return NULL;
 }
- result = PyList_New(n);
+ result = PyTuple_New(n);
 if (result == NULL) {
 return NULL;
 }
 iternextfunc iternext = *Py_TYPE(it)->tp_iternext;
- PyObject **items = _PyList_ITEMS(result);
+ PyObject **items = _PyTuple_ITEMS(result);
 for (i=0 ; i < n ; i++) {
 item = iternext(it);
 if (item == NULL) {


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /