I implemented a doubly linked list in Python. Please tell me what I can do to improve performance, what methods I should add, etc. I gave it the best Big O time perfomance/complexity I could. So, here it is:
class DoublyLinkedList():
"""
A basic class implementing a doubly linked list.
DoublyLinkedList() - new empty linked list
DoublyLinkedList(iterable) - new linked list with the items of the iterable:
head - iterable[0] tail - iterable[-1]
"""
class _Node():
"""A doubly linked list node class."""
def __init__(self, value):
"""Initialize default values."""
self._value = value
self._next = None
self._prev = None
def __init__(self, seq=()):
"""Initialize default values."""
self._head = None
self._tail = None
self._size = 0 # set default values
self.extend(seq) # copy iterables values
def __iter__(self):
"""Implement iter(self)."""
node = self._head
while node:
yield node._value
node = node._next
def __len__(self):
"""Implement len(self). Return the number of items in list."""
return self._size
def __str__(self):
"""Define string casting for the list."""
return 'None <= ' + ' <=> '.join(map(str, self)) + ' => None'
def __repr__(self):
"""Return repr(self)."""
return self.__str__()
def __contains__(self, item):
"""Implement 'in' access: if item in."""
for i in self:
if i == item:
return True
return False
def __eq__(self, other):
"""Implement comparison: a == b."""
if type(other) is not type(self): # check if other is dll
return False
if len(self) != len(other):
return False
for i, j in zip(self, other):
if i != j:
return False
return True
def __getitem__(self, index):
"""Implement indexing access: a[b]."""
# change index if negative
index = self._size + index if index < 0 else index
if 0 <= index < self._size:
for i, item in enumerate(self):
if i == index:
return item
else:
raise IndexError('list index out of range')
def __setitem__(self, index, item):
"""Implement indexed assignment."""
# change index if negative
index = self._size + index if index < 0 else index
if 0 <= index < self._size:
i = 0
node = self._head
while i < index:
node = node._next
i += 1
node._value = item
else:
raise IndexError('list assignment index out of range')
def __delitem__(self, index):
"""Implement indexed deletion."""
# change index if negative
if type(index) is not int:
raise TypeError('list index must be an integer')
index = self._size + index if index < 0 else index
if 0 < index < self._size - 1:
i = 0
node = self._head
while i < index:
node = node._next
i += 1
node._prev._next = node._next
node._next._prev = node._prev
self._size -= 1
elif index == 0 and self._head is not None: # case for head
self._head = self._head._next
self._head._prev = None
self._size -= 1
elif index == self._size - 1 and self._head is not None:
self._tail = self._tail._prev
self._tail._next = None
self._size -= 1
else:
raise IndexError('list index out of range')
def insertStart(self, item):
"""Insert an item to the _head of the list."""
new_node = self._Node(item)
if not self._head: # or if not self._tail
self._head = new_node
self._tail = new_node
else:
new_node._next = self._head
self._head._prev = new_node
self._head = new_node
self._size += 1
def insertEnd(self, item):
"""Insert an item at the _tail of the list."""
new_node = self._Node(item)
if not self._tail: # or if not self._head
self._tail = new_node
self._head = new_node
else:
new_node._prev = self._tail
self._tail._next = new_node
self._tail = new_node
self._size += 1
def insert(self, index, item):
"""Insert an item before the specified index."""
t = type(index)
if t is not int:
raise TypeError('{} cannot be interpreted as an integer'.format(t))
else:
# change index if negative
index = self._size + index if index < 0 else index
if index > self._size - 1: # check for special cases
self.insertEnd(item)
elif index <= 0:
self.insertStart(item)
else: # iterate through and insert item
i = 0
node = self._head
while i < index - 1:
node = node._next
i += 1
new_node = self._Node(item)
new_node._next = node._next
new_node._prev = node
node._next = new_node
new_node._next._prev = new_node
self._size += 1
def extend(self, seq=()):
"""Extend list by appending elements from the iterable."""
for i in seq:
self.insertEnd(i)
def remove(self, item):
"""
Remove the first occurence of the value(default _tail).
Raises a ValueError if the is not present.
Raises an IndexError if the list is empty.
"""
if not self._head:
raise IndexError("remove from an empty list")
else:
if self._head._value == item: # case for head
self._head = self._head._next
self._head._prev = None
elif self._tail._value == item: # case for tail
self._tail = self._tail._prev
self._tail._next = None
else:
node = self._head
try:
while node._value != item:
node = node._next
node._prev._next = node._next
node._next._prev = node._prev
except AttributeError: # mute the original error
raise ValueError('value not present in list') from None
self._size -= 1
def pop(self, index=-1):
"""
Remove and return item at specified index (default last).
Raises IndexError if list is empty or index is out of range.
"""
if self._size == 0: # check if list is empty
raise IndexError("pop from an empty list")
t = type(index)
if t is not int: # check if index is integer
raise TypeError('{} cannot be interpreted as an integer'.format(t))
item = self[index] # save the item to return
del self[index]
return item
def index(self, item):
"""Return index of first occurence of specified item. -1 if absent."""
for index, el in enumerate(self):
if el == item:
return index
return -1
def count(self, item):
"""Return number of occurrences of item."""
count = 0
for i in self:
if i == item:
count += 1
return count
def clear(self):
"""Remove all the items from the list."""
self._head = None
self._tail = None
self._size = 0
def reverse(self):
"""Reverse list in place."""
tmp = None
curr = self._head
while curr:
tmp = curr._prev
curr._prev = curr._next
curr._next = tmp
curr = curr._prev
if tmp:
self._head = tmp._prev
def sort(self):
"""Sort list in place."""
self._head = self._merge_sort(self._head)
def _merge(self, left, right): # merge two lists
t_head = self._Node(None)
curr = t_head
while left and right:
if left._value < right._value:
curr._next = left
left = left._next
else:
curr._next = right
right = right._next
curr = curr._next
if left is None:
curr._next = right
if right is None:
curr._next = left
return t_head._next
def _split(self, lst): # split a list
if lst is None or lst._next is None:
left = lst
right = None
return left, right
else:
mid = lst
fast = lst._next
while fast is not None:
fast = fast._next
if fast is not None:
fast = fast._next
mid = mid._next
left = lst
right = mid._next
mid._next = None
return left, right
def _merge_sort(self, t_head): # merge sort
if t_head is None:
return t_head
if t_head._next is None:
return t_head
left, right = self._split(t_head)
left = self._merge_sort(left)
right = self._merge_sort(right)
return self._merge(left, right)
if __name__ == '__main__':
dll = DoublyLinkedList([2, 4, 1, 8, 5, 3])
print(dll)
dll.insertEnd(4)
dll.insertStart(0)
dll.sort()
dll.insert(-11, 'start')
print(dll)
print(dll.pop())
print(dll.pop(2))
dll.remove(4)
dll.extend('someiterable')
dll.index(8)
print(dll.count(4))
print(dll)
2 Answers 2
__getitem__
,__setitem__
, and__delitem__
should share index-validation and repair code. In__delitem__
you check the type ofindex
as well as its algebraic sign (whether it's negative). In__getitem__
and__setitem__
, you check only the algebraic sign; did you intend to not check the type ofindex
in those two functions? If so, you should comment on why. If not, then you should write something likedef __validate_and_repair_index(self, index): if type(index) is not int: raise TypeError('list index must be an integer') # count from the end if "index" is negative index = self._size + index if index < 0 else index return index
and call it from all three of
__getitem__
,__setitem__
, and__delitem__
, and maybe some other places, too.The following block in
__delitem__
seems to have a bug (testing_head
instead of_tail
)elif index == self._size - 1 and self._head is not None: self._tail = self._tail._prev self._tail._next = None self._size -= 1
Many of your docstrings and comments are vapid and should be removed or made informative. For example
"""Initialize default values"""
in__init__
doesn't tell the reader anything, but wastes the reader's time. A docstring in__len(self)__
that says"""Implement len(self)"""
similarly adds no information and simply wastes your reader's attention and energy. A comment that sayschange index if negative
right above code that does nothing other than change index if negative is not an informative comment. A better one would becount from the end if index is negative
because that's the intent ofchange index if negative
: that's why you're changing index if negative. A comment likeif t is not int: # check if index is integer
is unnecessary: it simply re-iterates the plain meaning of the code in other words. If you were writing a story, you would not write "I went to the store this morning; this morning, to the store I went." So why would you write comments like that in code that is already crystal clear and can't have any intent other than the obvious one? There are more cases in the code. Some comments can profitably made intoasserts
. For instance, ininsertStart
, you might replace# or not self._tail
withassert not self._tail
on a new line.The fields of
_Node
should not be named_value
,_prev
, and_next
because the underscore prefix conventionally connotes "protected," meaning accessible only in subclasses. Various linters and IDEs (like PyCharm) will flag every access of those fields because_Node
is used directly in the rest of the code, not through subclasses.Is there a bug in
__contains__
? Shouldi == item
bei.value == item
?. Likewise, should__getitem__
return theNode
object or the value of theNode
object?I'm allowed to insert
math.nan
andcmath.nanj
, but they will fail equality tests such as that in__contains__
. Fixing this requires some design. Should nans be disallowed? How? Silently rejected? Ignored? Raise aValueError
? With or without a message?
As a general comment, if you write tests along with your code before going to code review, you will have many fewer issues in the review. I highly recommend hypothesis for Python, which runs on pytest. Here, for instance, is a strategy
for hypothesis
that generated many examples that broke much of the original code:
from typing import Union
import hypothesis.strategies as st
Atom = Union[str, float, bool, int, complex]
PayloadStrategy = st.deferred(
lambda: st.from_type(Atom) |
st.lists(PayloadStrategy) |
st.tuples(PayloadStrategy) |
st.iterables(PayloadStrategy) |
# st.sets(PayloadStrategy) | # TODO: why doesn't this work?
st.dictionaries(
keys=st.from_type(str),
values=PayloadStrategy))
Instead of passing an iterable to the constructor, you could use *values
as the argument:
def __init__(self, *values):
# ...
self.extend(values)
This will allow you to use:
dll = DoublyLinkedList(2, 4, 1, 8, 5, 3)
instead of
dll = DoublyLinkedList([2, 4, 1, 8, 5, 3])
You are relying on __iter__(self)
(which creates a generator) for the implementation of __contains__
, __eq__
, __getitem__
, index
, and count
. It would be more efficient to simply loop through your linked list manually, rather than creating a generator, and context switching back and forth between the method code and the generator code.
BUG: remove(self, item)
will fail if you remove the only item from list of 1 item.
# ...
self._head = self._head._next # self._head = None
self._head._prev = None # None has no attribute ._prev
# ...
BUG: Neither reverse(self):
nor sort(self):
changes self._tail
!
__getitem__(self, index):
, __setitem__(self, index, item):
and insert(self, index, item):
do not take advantage of the double-linking. If index
is in the last half of the list, iterating from self._tail
should be faster.
-
\$\begingroup\$ Hey, thanks for your review :) I will definitely change those bugs and make indexing faster while taking advantage of double linking. Now for the constructor argument, I think that passing an iterable is more efficient, for example passing multiple values as an iterable is very easy, just pass them as a list or tuple, but if one has an iterable then it would have them do a for loop, and why, when we can do that for them? As for relying on iterator, I think it's actually good to do so, because why manually iterate through items when you have an iterator. But hey, thanks for those bugs :) \$\endgroup\$Nick– Nick2018年08月30日 20:38:56 +00:00Commented Aug 30, 2018 at 20:38
-
\$\begingroup\$ Do you also have a bug in
__delitem__
in the third branch where you are deleting the tail, but the code saysand self._head is not None
? \$\endgroup\$Reb.Cabin– Reb.Cabin2019年12月06日 15:11:05 +00:00Commented Dec 6, 2019 at 15:11 -
\$\begingroup\$ As with
remove
,__delitem__
fails when the list has one item. \$\endgroup\$Reb.Cabin– Reb.Cabin2019年12月06日 18:47:13 +00:00Commented Dec 6, 2019 at 18:47
Explore related questions
See similar questions with these tags.