Doubly Linked List in Python

Question 1

I implemented a doubly linked list in Python. Please tell me what I can do to improve performance, what methods I should add, etc. I gave it the best Big O time perfomance/complexity I could. So, here it is:

class DoublyLinkedList():
 """
 A basic class implementing a doubly linked list.
 DoublyLinkedList() - new empty linked list
 DoublyLinkedList(iterable) - new linked list with the items of the iterable:
 head - iterable[0] tail - iterable[-1]
 """
 class _Node():
 """A doubly linked list node class."""
 def __init__(self, value):
 """Initialize default values."""
 self._value = value
 self._next = None
 self._prev = None
 def __init__(self, seq=()):
 """Initialize default values."""
 self._head = None
 self._tail = None
 self._size = 0 # set default values
 self.extend(seq) # copy iterables values
 def __iter__(self):
 """Implement iter(self)."""
 node = self._head
 while node:
 yield node._value
 node = node._next
 def __len__(self):
 """Implement len(self). Return the number of items in list."""
 return self._size
 def __str__(self):
 """Define string casting for the list."""
 return 'None <= ' + ' <=> '.join(map(str, self)) + ' => None'
 def __repr__(self):
 """Return repr(self)."""
 return self.__str__()
 def __contains__(self, item):
 """Implement 'in' access: if item in."""
 for i in self:
 if i == item:
 return True
 return False
 def __eq__(self, other):
 """Implement comparison: a == b."""
 if type(other) is not type(self): # check if other is dll
 return False
 if len(self) != len(other):
 return False
 for i, j in zip(self, other):
 if i != j:
 return False
 return True
 def __getitem__(self, index):
 """Implement indexing access: a[b]."""
 # change index if negative
 index = self._size + index if index < 0 else index
 if 0 <= index < self._size:
 for i, item in enumerate(self):
 if i == index:
 return item
 else:
 raise IndexError('list index out of range')
 def __setitem__(self, index, item):
 """Implement indexed assignment."""
 # change index if negative
 index = self._size + index if index < 0 else index
 if 0 <= index < self._size:
 i = 0
 node = self._head
 while i < index:
 node = node._next
 i += 1
 node._value = item
 else:
 raise IndexError('list assignment index out of range')
 def __delitem__(self, index):
 """Implement indexed deletion."""
 # change index if negative
 if type(index) is not int:
 raise TypeError('list index must be an integer')
 index = self._size + index if index < 0 else index
 if 0 < index < self._size - 1:
 i = 0
 node = self._head
 while i < index:
 node = node._next
 i += 1
 node._prev._next = node._next
 node._next._prev = node._prev
 self._size -= 1
 elif index == 0 and self._head is not None: # case for head
 self._head = self._head._next
 self._head._prev = None
 self._size -= 1
 elif index == self._size - 1 and self._head is not None:
 self._tail = self._tail._prev
 self._tail._next = None
 self._size -= 1
 else:
 raise IndexError('list index out of range')
 def insertStart(self, item):
 """Insert an item to the _head of the list."""
 new_node = self._Node(item)
 if not self._head: # or if not self._tail
 self._head = new_node
 self._tail = new_node
 else:
 new_node._next = self._head
 self._head._prev = new_node
 self._head = new_node
 self._size += 1
 def insertEnd(self, item):
 """Insert an item at the _tail of the list."""
 new_node = self._Node(item)
 if not self._tail: # or if not self._head
 self._tail = new_node
 self._head = new_node
 else:
 new_node._prev = self._tail
 self._tail._next = new_node
 self._tail = new_node
 self._size += 1
 def insert(self, index, item):
 """Insert an item before the specified index."""
 t = type(index)
 if t is not int:
 raise TypeError('{} cannot be interpreted as an integer'.format(t))
 else:
 # change index if negative
 index = self._size + index if index < 0 else index
 if index > self._size - 1: # check for special cases
 self.insertEnd(item)
 elif index <= 0:
 self.insertStart(item)
 else: # iterate through and insert item
 i = 0
 node = self._head
 while i < index - 1:
 node = node._next
 i += 1
 new_node = self._Node(item)
 new_node._next = node._next
 new_node._prev = node
 node._next = new_node
 new_node._next._prev = new_node
 self._size += 1
 def extend(self, seq=()):
 """Extend list by appending elements from the iterable."""
 for i in seq:
 self.insertEnd(i)
 def remove(self, item):
 """
 Remove the first occurence of the value(default _tail).
 Raises a ValueError if the is not present.
 Raises an IndexError if the list is empty.
 """
 if not self._head:
 raise IndexError("remove from an empty list")
 else:
 if self._head._value == item: # case for head
 self._head = self._head._next
 self._head._prev = None
 elif self._tail._value == item: # case for tail
 self._tail = self._tail._prev
 self._tail._next = None
 else:
 node = self._head
 try:
 while node._value != item:
 node = node._next
 node._prev._next = node._next
 node._next._prev = node._prev
 except AttributeError: # mute the original error
 raise ValueError('value not present in list') from None
 self._size -= 1
 def pop(self, index=-1):
 """
 Remove and return item at specified index (default last).
 Raises IndexError if list is empty or index is out of range.
 """
 if self._size == 0: # check if list is empty
 raise IndexError("pop from an empty list")
 t = type(index)
 if t is not int: # check if index is integer
 raise TypeError('{} cannot be interpreted as an integer'.format(t))
 item = self[index] # save the item to return
 del self[index]
 return item
 def index(self, item):
 """Return index of first occurence of specified item. -1 if absent."""
 for index, el in enumerate(self):
 if el == item:
 return index
 return -1
 def count(self, item):
 """Return number of occurrences of item."""
 count = 0
 for i in self:
 if i == item:
 count += 1
 return count
 def clear(self):
 """Remove all the items from the list."""
 self._head = None
 self._tail = None
 self._size = 0
 def reverse(self):
 """Reverse list in place."""
 tmp = None
 curr = self._head
 while curr:
 tmp = curr._prev
 curr._prev = curr._next
 curr._next = tmp
 curr = curr._prev
 if tmp:
 self._head = tmp._prev
 def sort(self):
 """Sort list in place."""
 self._head = self._merge_sort(self._head)
 def _merge(self, left, right): # merge two lists
 t_head = self._Node(None)
 curr = t_head
 while left and right:
 if left._value < right._value:
 curr._next = left
 left = left._next
 else:
 curr._next = right
 right = right._next
 curr = curr._next
 if left is None:
 curr._next = right
 if right is None:
 curr._next = left
 return t_head._next
 def _split(self, lst): # split a list
 if lst is None or lst._next is None:
 left = lst
 right = None
 return left, right
 else:
 mid = lst
 fast = lst._next
 while fast is not None:
 fast = fast._next
 if fast is not None:
 fast = fast._next
 mid = mid._next
 left = lst
 right = mid._next
 mid._next = None
 return left, right
 def _merge_sort(self, t_head): # merge sort
 if t_head is None:
 return t_head
 if t_head._next is None:
 return t_head
 left, right = self._split(t_head)
 left = self._merge_sort(left)
 right = self._merge_sort(right)
 return self._merge(left, right)
if __name__ == '__main__':
 dll = DoublyLinkedList([2, 4, 1, 8, 5, 3])
 print(dll)
 dll.insertEnd(4)
 dll.insertStart(0)
 dll.sort()
 dll.insert(-11, 'start')
 print(dll)
 print(dll.pop())
 print(dll.pop(2))
 dll.remove(4)
 dll.extend('someiterable')
 dll.index(8)
 print(dll.count(4))
 print(dll)

Question 2

__getitem__, __setitem__, and __delitem__ should share index-validation and repair code. In __delitem__ you check the type of index as well as its algebraic sign (whether it's negative). In __getitem__ and __setitem__, you check only the algebraic sign; did you intend to not check the type of index in those two functions? If so, you should comment on why. If not, then you should write something like
```
def __validate_and_repair_index(self, index):
 if type(index) is not int:
 raise TypeError('list index must be an integer')
 # count from the end if "index" is negative
 index = self._size + index if index < 0 else index
 return index
```
and call it from all three of __getitem__, __setitem__, and __delitem__, and maybe some other places, too.

The following block in __delitem__ seems to have a bug (testing _head instead of _tail)

elif index == self._size - 1 and self._head is not None:
 self._tail = self._tail._prev
 self._tail._next = None
 self._size -= 1

Many of your docstrings and comments are vapid and should be removed or made informative. For example """Initialize default values""" in __init__ doesn't tell the reader anything, but wastes the reader's time. A docstring in __len(self)__ that says """Implement len(self)""" similarly adds no information and simply wastes your reader's attention and energy. A comment that says change index if negative right above code that does nothing other than change index if negative is not an informative comment. A better one would be count from the end if index is negative because that's the intent of change index if negative: that's why you're changing index if negative. A comment like if t is not int: # check if index is integer is unnecessary: it simply re-iterates the plain meaning of the code in other words. If you were writing a story, you would not write "I went to the store this morning; this morning, to the store I went." So why would you write comments like that in code that is already crystal clear and can't have any intent other than the obvious one? There are more cases in the code. Some comments can profitably made into asserts. For instance, in insertStart, you might replace # or not self._tail with assert not self._tail on a new line.
The fields of _Node should not be named _value, _prev, and _next because the underscore prefix conventionally connotes "protected," meaning accessible only in subclasses. Various linters and IDEs (like PyCharm) will flag every access of those fields because _Node is used directly in the rest of the code, not through subclasses.
Is there a bug in __contains__? Should i == item be i.value == item?. Likewise, should __getitem__ return the Node object or the value of the Node object?
I'm allowed to insert math.nan and cmath.nanj, but they will fail equality tests such as that in __contains__. Fixing this requires some design. Should nans be disallowed? How? Silently rejected? Ignored? Raise a ValueError? With or without a message?

As a general comment, if you write tests along with your code before going to code review, you will have many fewer issues in the review. I highly recommend hypothesis for Python, which runs on pytest. Here, for instance, is a strategy for hypothesis that generated many examples that broke much of the original code:

from typing import Union
import hypothesis.strategies as st
Atom = Union[str, float, bool, int, complex]
PayloadStrategy = st.deferred(
 lambda: st.from_type(Atom) |
 st.lists(PayloadStrategy) |
 st.tuples(PayloadStrategy) |
 st.iterables(PayloadStrategy) |
 # st.sets(PayloadStrategy) | # TODO: why doesn't this work?
 st.dictionaries(
 keys=st.from_type(str),
 values=PayloadStrategy))

Question 3

Instead of passing an iterable to the constructor, you could use *values as the argument:

 def __init__(self, *values):
 # ... 
 self.extend(values)

This will allow you to use:

dll = DoublyLinkedList(2, 4, 1, 8, 5, 3)

instead of

dll = DoublyLinkedList([2, 4, 1, 8, 5, 3])

You are relying on __iter__(self) (which creates a generator) for the implementation of __contains__, __eq__, __getitem__, index, and count. It would be more efficient to simply loop through your linked list manually, rather than creating a generator, and context switching back and forth between the method code and the generator code.

BUG: remove(self, item) will fail if you remove the only item from list of 1 item.

 # ...
 self._head = self._head._next # self._head = None
 self._head._prev = None # None has no attribute ._prev
 # ...

BUG: Neither reverse(self): nor sort(self): changes self._tail!

__getitem__(self, index):, __setitem__(self, index, item): and insert(self, index, item): do not take advantage of the double-linking. If index is in the last half of the list, iterating from self._tail should be faster.

Question 4

Hey, thanks for your review :) I will definitely change those bugs and make indexing faster while taking advantage of double linking. Now for the constructor argument, I think that passing an iterable is more efficient, for example passing multiple values as an iterable is very easy, just pass them as a list or tuple, but if one has an iterable then it would have them do a for loop, and why, when we can do that for them? As for relying on iterator, I think it's actually good to do so, because why manually iterate through items when you have an iterator. But hey, thanks for those bugs :)

Question 5

Do you also have a bug in __delitem__ in the third branch where you are deleting the tail, but the code says and self._head is not None?

Question 6

As with remove, __delitem__ fails when the list has one item.

Reb.Cabin Reb.Cabin 1611 silver badge4 bronze badges · Answer 1 · 2019-12-06 15:41:40Z

__getitem__, __setitem__, and __delitem__ should share index-validation and repair code. In __delitem__ you check the type of index as well as its algebraic sign (whether it's negative). In __getitem__ and __setitem__, you check only the algebraic sign; did you intend to not check the type of index in those two functions? If so, you should comment on why. If not, then you should write something like
```
def __validate_and_repair_index(self, index):
 if type(index) is not int:
 raise TypeError('list index must be an integer')
 # count from the end if "index" is negative
 index = self._size + index if index < 0 else index
 return index
```
and call it from all three of __getitem__, __setitem__, and __delitem__, and maybe some other places, too.

The following block in __delitem__ seems to have a bug (testing _head instead of _tail)

elif index == self._size - 1 and self._head is not None:
 self._tail = self._tail._prev
 self._tail._next = None
 self._size -= 1

Many of your docstrings and comments are vapid and should be removed or made informative. For example """Initialize default values""" in __init__ doesn't tell the reader anything, but wastes the reader's time. A docstring in __len(self)__ that says """Implement len(self)""" similarly adds no information and simply wastes your reader's attention and energy. A comment that says change index if negative right above code that does nothing other than change index if negative is not an informative comment. A better one would be count from the end if index is negative because that's the intent of change index if negative: that's why you're changing index if negative. A comment like if t is not int: # check if index is integer is unnecessary: it simply re-iterates the plain meaning of the code in other words. If you were writing a story, you would not write "I went to the store this morning; this morning, to the store I went." So why would you write comments like that in code that is already crystal clear and can't have any intent other than the obvious one? There are more cases in the code. Some comments can profitably made into asserts. For instance, in insertStart, you might replace # or not self._tail with assert not self._tail on a new line.
The fields of _Node should not be named _value, _prev, and _next because the underscore prefix conventionally connotes "protected," meaning accessible only in subclasses. Various linters and IDEs (like PyCharm) will flag every access of those fields because _Node is used directly in the rest of the code, not through subclasses.
Is there a bug in __contains__? Should i == item be i.value == item?. Likewise, should __getitem__ return the Node object or the value of the Node object?
I'm allowed to insert math.nan and cmath.nanj, but they will fail equality tests such as that in __contains__. Fixing this requires some design. Should nans be disallowed? How? Silently rejected? Ignored? Raise a ValueError? With or without a message?

As a general comment, if you write tests along with your code before going to code review, you will have many fewer issues in the review. I highly recommend hypothesis for Python, which runs on pytest. Here, for instance, is a strategy for hypothesis that generated many examples that broke much of the original code:

from typing import Union
import hypothesis.strategies as st
Atom = Union[str, float, bool, int, complex]
PayloadStrategy = st.deferred(
 lambda: st.from_type(Atom) |
 st.lists(PayloadStrategy) |
 st.tuples(PayloadStrategy) |
 st.iterables(PayloadStrategy) |
 # st.sets(PayloadStrategy) | # TODO: why doesn't this work?
 st.dictionaries(
 keys=st.from_type(str),
 values=PayloadStrategy))

AJNeufeld AJNeufeld 35.2k5 gold badges41 silver badges103 bronze badges · Answer 2 · 2018-08-30 19:53:21Z

Instead of passing an iterable to the constructor, you could use *values as the argument:

 def __init__(self, *values):
 # ... 
 self.extend(values)

This will allow you to use:

dll = DoublyLinkedList(2, 4, 1, 8, 5, 3)

instead of

dll = DoublyLinkedList([2, 4, 1, 8, 5, 3])

You are relying on __iter__(self) (which creates a generator) for the implementation of __contains__, __eq__, __getitem__, index, and count. It would be more efficient to simply loop through your linked list manually, rather than creating a generator, and context switching back and forth between the method code and the generator code.

BUG: remove(self, item) will fail if you remove the only item from list of 1 item.

 # ...
 self._head = self._head._next # self._head = None
 self._head._prev = None # None has no attribute ._prev
 # ...

BUG: Neither reverse(self): nor sort(self): changes self._tail!

__getitem__(self, index):, __setitem__(self, index, item): and insert(self, index, item): do not take advantage of the double-linking. If index is in the last half of the list, iterating from self._tail should be faster.

Hey, thanks for your review :) I will definitely change those bugs and make indexing faster while taking advantage of double linking. Now for the constructor argument, I think that passing an iterable is more efficient, for example passing multiple values as an iterable is very easy, just pass them as a list or tuple, but if one has an iterable then it would have them do a for loop, and why, when we can do that for them? As for relying on iterator, I think it's actually good to do so, because why manually iterate through items when you have an iterator. But hey, thanks for those bugs :)
Do you also have a bug in __delitem__ in the third branch where you are deleting the tail, but the code says and self._head is not None?
As with remove, __delitem__ fails when the list has one item.

Stack Exchange Network

Doubly Linked List in Python

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Doubly Linked List in Python

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions