def merge_arrays(list1, list2):
len_list1 = len(list1); len_list2 = len(list2)
merge_len = len_list1 + len_list2
merge_list = []
l1_ptr = 0
l2_ptr = 0
# import pdb; pdb.set_trace()
while(l1_ptr <= len_list1-1 and l2_ptr <= len_list2-1):
if (list1[l1_ptr] <= list2[l2_ptr]):
merge_list.append(list1[l1_ptr])
l1_ptr += 1
elif (list1[l1_ptr] > list2[l2_ptr]):
merge_list.append(list2[l2_ptr])
l2_ptr += 1
if l1_ptr > len_list1-1: #list1 exhausted
for item in list2[l2_ptr:]:
merge_list.append(item)
else:
for item in list1[l1_ptr:]:
merge_list.append(item)
return merge_list
I am trying to merge sorted arrays in python. How can I improve this? Honestly it looks like I've written this in C and not in Python
2 Answers 2
various
merge_len
is unused- the extra parentheses around the simple checks are unnecessary
l1_ptr <= len_list1-1
can be made more clearer asl1_ptr < len_list1
- using the variable name
l1_ptr
to save a few characters while making it harder to guess from the name what it does is not useful
Working with the indices directly is not really pythonic indeed. You can make this more generic, using iter
and next
, and work for all iterables.
typing
add typing information:
import typing
T = typing.TypeVar("T")
def merge_sorted_iterables(
iterable1: typing.Iterable[T], iterable2: typing.Iterable[T]
) -> typing.Iterable[T]:
This is extra explanation for the user of this function (and his IDE).
docstring
Add some explanation on what the method does, expects from the caller, and returns.
def merge_sorted_iterables(
iterable1: typing.Iterable[T], iterable2: typing.Iterable[T]
) -> typing.Iterable[T]:
"""Merge 2 sorted iterables.
The items in the iterables need to be comparable (and support `<=`).
...
"""
iterator
Instead of keeping track of the index you can use iter
and next
. You don't even need to add the items to a list, you can yield
them, so the caller of the method can decide in what way he wants to use this.
done = object()
iterator1 = iter(iterable1)
iterator2 = iter(iterable2)
item1 = next(iterator1, done)
item2 = next(iterator2, done)
while item1 is not done and item2 is not done:
if item1 <= item2:
yield item1
item1 = next(iterator1, done)
else:
yield item2
item2 = next(iterator2, done)
Then all that needs to be done is continue the iterator that is not finished
if item1 is not done:
yield item1
yield from iterator1
if item2 is not done:
yield item2
yield from iterator2
import typing
T = typing.TypeVar("T")
def merge_sorted_iterables(
iterable1: typing.Iterable[T], iterable2: typing.Iterable[T]
) -> typing.Iterable[T]:
"""Merge 2 sorted iterables.
The items in the iterables need to be comparable (and support `<=`).
...
"""
done = object()
iterator1 = iter(iterable1)
iterator2 = iter(iterable2)
item1 = next(iterator1, done)
item2 = next(iterator2, done)
while item1 is not done and item2 is not done:
if item1 <= item2:
yield item1
item1 = next(iterator1, done)
else:
yield item2
item2 = next(iterator2, done)
if item1 is not done:
yield item1
yield from iterator1
if item2 is not done:
yield item2
yield from iterator2
testing
You can test the behaviour, starting with the simplest cases:
import pytest
def test_empty():
expected = []
result = list(merge_sorted_iterables([], []))
assert result == expected
def test_single():
expected = [0, 1, 2]
result = list(merge_sorted_iterables([], range(3)))
assert expected == result
result = list(merge_sorted_iterables(range(3), [],))
assert expected == result
def test_simple():
expected = [0, 1, 2, 3, 4, 5]
result = list(merge_sorted_iterables([0, 1, 2], [3, 4, 5]))
assert result == expected
result = list(merge_sorted_iterables([0, 2, 4], [1, 3, 5]))
assert result == expected
result = list(merge_sorted_iterables([3, 4, 5], [0, 1, 2],))
assert result == expected
def test_string():
expected = list("abcdef")
result = list(merge_sorted_iterables("abc", "def"))
assert result == expected
result = list(merge_sorted_iterables("ace", "bdf"))
assert result == expected
result = list(merge_sorted_iterables("def", "abc",))
assert result == expected
def test_iterable():
expected = [0, 1, 2, 3, 4, 5]
result = list(merge_sorted_iterables(iter([0, 1, 2]), iter([3, 4, 5])))
assert result == expected
result = list(merge_sorted_iterables(iter([0, 2, 4]), iter([1, 3, 5])))
assert result == expected
result = list(merge_sorted_iterables(iter([3, 4, 5]), iter([0, 1, 2]),))
assert result == expected
def test_comparable():
with pytest.raises(TypeError, match="not supported between instances of"):
list(merge_sorted_iterables([0, 1, 2], ["a", "b", "c"]))
descending
Once you have these test in place, you can easily expand the behaviour to also take descending iterables:
import operator
def merge_sorted_iterables(
iterable1: typing.Iterable[T],
iterable2: typing.Iterable[T],
*,
ascending: bool = True,
) -> typing.Iterable[T]:
"""Merge 2 sorted iterables.
The items in the iterables need to be comparable.
...
"""
done = object()
iterator1 = iter(iterable1)
iterator2 = iter(iterable2)
item1 = next(iterator1, done)
item2 = next(iterator2, done)
comparison = operator.le if ascending else operator.ge
while item1 is not done and item2 is not done:
if comparison(item1, item2):
yield item1
item1 = next(iterator1, done)
else:
yield item2
item2 = next(iterator2, done)
if item1 is not done:
yield item1
yield from iterator1
if item2 is not done:
yield item2
yield from iterator2
I added the ascending
keyword as a keyword-only argument to avoid confusion and keep backwards compatibility
One of its tests:
def test_descending():
expected = [5, 4, 3, 2, 1, 0]
result = list(
merge_sorted_iterables([2, 1, 0], [5, 4, 3], ascending=False)
)
assert result == expected
result = list(
merge_sorted_iterables([4, 2, 0], [5, 3, 1], ascending=False)
)
assert result == expected
result = list(
merge_sorted_iterables([5, 4, 3], [2, 1, 0], ascending=False)
)
assert result == expected
-
\$\begingroup\$ wow this is great, thank you so much. you have no idea how much more i have learnt from your explanation and just this small example. thank you for taking the time, truly! \$\endgroup\$r4bb1t– r4bb1t2020年08月31日 12:58:29 +00:00Commented Aug 31, 2020 at 12:58
-
\$\begingroup\$ Thanks for the tests, have been useful :-). Twice you wrote
expected == result
instead ofresult == expected
, though. For consistency I think it would be better if all were the same. \$\endgroup\$superb rain– superb rain2020年08月31日 21:49:44 +00:00Commented Aug 31, 2020 at 21:49 -
\$\begingroup\$ I disagree 100% with the "typing" section. The function is called "merge_sorted_iterables" and takes two arguments "iterable1" and "iterable2". Of course they're iterables, and of course it returns an iterable. You've added zero useful information, and broken the function signature over three lines, and completely counterfeit the beauty of Python's indentation. \$\endgroup\$Adam Barnes– Adam Barnes2020年08月31日 22:57:12 +00:00Commented Aug 31, 2020 at 22:57
-
\$\begingroup\$ In this case the typing information is a bit superfluous. But it helps the IDE, and in other, less trivial cases it can have a lot of value \$\endgroup\$Maarten Fabré– Maarten Fabré2020年09月01日 04:57:47 +00:00Commented Sep 1, 2020 at 4:57
Use a 4-space indent.
Don't repeatedly subtract 1 from the same unchanging value.
Simplify the compare conditional: just use else
.
Take advantage of list.extend().
Drop the wrap-up conditionals: they aren't actually needed. Code like zs.extend(xs[xi:])
will work fine even if xi
exceeds the list
bounds.
Shorten the variable names to lighten the code weight and increase readability. There's no loss of meaning here, because all of the short names are quite conventional and make sense in a generic function like this.
def merge_arrays(xs, ys):
# Setup.
xmax = len(xs) - 1
ymax = len(ys) - 1
xi = 0
yi = 0
zs = []
# Compare and merge.
while xi <= xmax and yi <= ymax:
if xs[xi] <= ys[yi]:
zs.append(xs[xi])
xi += 1
else:
zs.append(ys[yi])
yi += 1
# Merge any remainders and return.
zs.extend(ys[yi:])
zs.extend(xs[xi:])
return zs
Last night I wrote an iterator-base solution but somehow forgot that next()
supports a handy default
argument: the code was awkward and Maarten
Fabré did a nicer
implementation. But if your willing to use
more_itertools.peekable()
you can achieve a simple, readable implementation. Thanks to
superb-rain
for an idea in the comments that helped me simplify further.
from more_itertools import peekable
def merge(xs, ys):
xit = peekable(xs)
yit = peekable(ys)
while xit and yit:
it = xit if xit.peek() <= yit.peek() else yit
yield next(it)
yield from (xit or yit)
-
1\$\begingroup\$ How about this? \$\endgroup\$superb rain– superb rain2020年08月31日 18:46:21 +00:00Commented Aug 31, 2020 at 18:46
-
\$\begingroup\$ @superbrain Cool: good to know a peekable iterator can be checked for exhaustion in a simple
if
-test. I simplified my version even more. \$\endgroup\$FMc– FMc2020年08月31日 19:35:36 +00:00Commented Aug 31, 2020 at 19:35
from heapq import merge
... was this intentionally reinventing the wheel? \$\endgroup\$sorted(list1 + list2)
. \$\endgroup\$list(merge(list1, list2))
. \$\endgroup\$