flatten.py
from collections import Iterable, Mapping
from operator import methodcaller
from stack import Stack
class flatten(object):
"""
Iterator that walk through nested iterable.
If the initial item is not iterable, only itself will be yielded.
Args:
item (Object): Any object.
depth (Optional[int]): Maximum nesting layer.
mapping_iter (str): the method to be called when handling mapping
>>> from collections import OrderedDict
>>> kwargs = OrderedDict()
>>> kwargs['file'] = 'test.txt'
>>> kwargs['data'] = [[0, 1], [-1, 0]]
>>> tuple(flatten(kwargs))
('test.txt', 0, 1, -1, 0)
>>> tuple(flatten('apple'))
('apple',)
"""
def __init__(self, item, depth=128, map_iter='values'):
self.map_iter = methodcaller(map_iter)
done, item = self.expand(item)
if done:
item = iter((item,))
self.stack = Stack([item], depth)
def __iter__(self):
return self
def __next__(self):
while self.stack:
for item in self.stack[-1]:
done, item = self.expand(item)
if done:
return item
else:
self.stack.append(item)
break
else:
self.stack.pop()
raise StopIteration
def expand(self, item):
if isinstance(item, str):
return True, item
elif isinstance(item, Mapping):
return False, iter(self.map_iter(item))
elif isinstance(item, Iterable):
return False, iter(item)
else:
return True, item
class named_flatten(flatten):
"""
Iterator that walk through nested iterable and create tuple key.
For non Mapping iterable, the key are generated by enumerate.
If the initial item is not iterable, only itself will be yielded.
Args:
item (Object): Any object.
depth (Optional[int]): Maximum nesting layer.
>>> from collections import OrderedDict
>>> kwargs = OrderedDict()
>>> kwargs['file'] = 'test.txt'
>>> kwargs['data'] = [[0, 1], [-1, 0]]
>>> result = tuple(named_flatten(kwargs))
>>> result[0]
(('file',), 'test.txt')
>>> result[1]
(('data', 0, 0), 0)
>>> tuple(named_flatten('apple'))
(((), 'apple'),)
"""
def __init__(self, item, depth=128):
super().__init__(((), item), depth, map_iter)
def expand(self, item):
key, value = item
if isinstance(value, str):
return True, item
elif isinstance(value, Mapping):
it = value.items()
elif isinstance(value, Iterable):
it = enumerate(value)
else:
return True, item
return False, ((key+(k,), v) for k, v in it)
if __name__ == '__main__':
import doctest
doctest.testmod()
stack.py
from collections import deque
class FullStackError(MemoryError):
pass
class Stack(deque):
@property
def full(self):
return 0 == (self.maxlen - len(self) if self.maxlen else self.maxlen)
def append(self, x):
if self.full:
raise FullStackError
super().append(x)
def appendleft(self, x):
if self.full:
raise FullStackError
super().appendleft(x)
def insert(self, i, x):
if self.full:
raise FullStackError
super().insert(i, x)
def extend(self, iterable):
for x in iterable:
self.append(x)
def extendleft(self, iterable):
for x in iterable:
self.appendleft(x)
def __str__(self):
return 'Stack' + super().__str__()[5:]
def __repr__(self):
return 'Stack' + super().__repr__()[5:]
2 Answers 2
Conceptually, this is a recursion problem. You've unrolled the recursion into iteration to support the __iter__()
method. However, this is precisely the kind of problem that generators can solve trivially! (Here, I've used yield from
, which is a feature introduced in Python 3.3.)
from collections import Iterable, Mapping
from operator import methodcaller
def flatten(it, map_iter='values', max_depth=128):
if max_depth < 0:
raise RecursionError('maximum recursion depth exceded in flatten')
elif isinstance(it, str):
yield it
elif isinstance(it, Mapping):
for item in methodcaller(map_iter)(it):
yield from flatten(item, map_iter=map_iter, max_depth=max_depth-1)
elif isinstance(it, Iterable):
for item in it:
yield from flatten(item, map_iter=map_iter, max_depth=max_depth-1)
else:
yield it
I've named the parameter it
, which is a triple entendre that can be interpreted as "iterable", "item", or just the third-person singular impersonal pronoun.
Instead of FullStackError
, which leaks information about how your iterator is implemented, just use the built-in RecursionError
. Personally, I'd choose to scrap the depth limit altogether, since the Python interpreter's stack will have its own natural recursion limit.
-
\$\begingroup\$ Thanks for your simpler solution! I heard about the rumer that python is not the best language to use recursion so I try to unroll it. The recursion limit was an attempt to handle recursive containers; Is it better to let python handle that cases itself? \$\endgroup\$user3136376– user31363762016年03月12日 00:46:49 +00:00Commented Mar 12, 2016 at 0:46
-
\$\begingroup\$ In languages that support tail call optimization, you can write code that recurses many times without overflowing the stack. Python, like Java, C, and many other languages, is not tail-recursive. Still, I wouldn't bother putting an artificial recursion depth limit on this function. In Python, as in Java, it is possible to overflow the stack while attempting to flatten certain data structures recursively. However, you would likely hit that limit only if the data structure you are traversing contains a loop (i.e. it's not a tree). \$\endgroup\$200_success– 200_success2016年03月12日 02:37:48 +00:00Commented Mar 12, 2016 at 2:37
-
\$\begingroup\$ For a version that's compatible with python < 3.3: gist.github.com/nikolas/911fdb907f556f506f35a01e3a31aa8e \$\endgroup\$nnyby– nnyby2019年07月29日 16:41:48 +00:00Commented Jul 29, 2019 at 16:41
Let's do the stack first.
MemoryError
is absolutely not the base class that should be used for this exception,
because the program didn't run out of memory. It's just the stack that
is full. I'd only inherit from Exception
for this.
The full
is a bit too complicated for what it does. Since not having
a maxlen
means that full
will always return False
a simple if
would be a bit more straightforward:
@property
def full(self):
if not self.maxlen:
return False
return (self.maxlen - len(self)) == 0
Otherwise looks fine, even if I'm not super keen on the [5:]
in the
formatting functions, but I guess it does the job.
Now flatten
looks good, the only thing I don't understand is the
depth
argument - is that just used as a safety measure?
-
\$\begingroup\$ Thank for your review! For the depth argument, It is an attempt to handle recursive list or infinitely nested iterator. \$\endgroup\$user3136376– user31363762016年03月12日 02:13:16 +00:00Commented Mar 12, 2016 at 2:13