Python Hash Table and insertion

Question 1

I'm attempting to implement a basic hash table in Python using linked lists. Any tips would be appreciated (including better hash functions). I intend this to handle collisions with separate chaining.

I include my gist for my linked list:

Question: Are there any things I handled incorrectly or could have implemented in a better way?

I also checked my function against the unit-test and make sure it all 11 tests and passes them.

#!python
from linkedlist import LinkedList
class HashTable(object):
 def __init__(self, init_size=8):
 """Initialize this hash table with the given initial size."""
 self.buckets = [LinkedList() for i in range(init_size)]
 self.size = 0 # Number of key-value entries
 def __str__(self):
 """Return a formatted string representation of this hash table."""
 items = ['{!r}: {!r}'.format(key, val) for key, val in self.items()]
 return '{' + ', '.join(items) + '}'
 def __repr__(self):
 """Return a string representation of this hash table."""
 return 'HashTable({!r})'.format(self.items())
 def _bucket_index(self, key):
 """Return the bucket index where the given key would be stored."""
 return hash(key) % len(self.buckets)
 def load_factor(self):
 """Return the load factor, the ratio of number of entries to buckets.
 Best and worst case running time: Theta(n) it always depends on the number
 of items that need to be counted, one by one"""
 return float(self.size) / (len(self.buckets))
 def keys(self):
 """Return a list of all keys in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all keys in each of the buckets
 all_keys = []
 for bucket in self.buckets:
 for key, value in bucket.items():
 all_keys.append(key)
 return all_keys
 def values(self):
 """Return a list of all values in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all values in each of the buckets
 all_values = []
 for bucket in self.buckets:
 for key, value in bucket.items():
 all_values.append(value)
 return all_values
 def items(self):
 """Return a list of all entries (key-value pairs) in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all pairs of key-value entries in each of the buckets
 all_items = []
 for bucket in self.buckets:
 all_items.extend(bucket.items())
 return all_items
 def length(self):
 """Return the number of key-value entries by traversing its buckets.
 Best and worst case running time: O(1)"""
 return self.size
 def contains(self, key):
 """Return True if this hash table contains the given key, or False.
 Best case running time: O(1) near beginning of list of keys
 Worst case running time: O(n) near end of list of keys """
 # Find the bucket the given key belongs in
 index = self._bucket_index(key)
 bucket = self.buckets[index]
 entry = bucket.find(lambda key_value: key_value[0] == key)
 return entry is not None # True or False
 def get(self, key):
 """Return the value associated with the given key, or raise KeyError.
 Best case running time: O(1) if entry is near beginning of list in first bucket
 Worst case running time: O(n) in most cases it will be theta(n) and depend
 on where it is located in the list of items"""
 # Find the bucket the given key belongs in
 index = self._bucket_index(key)
 bucket = self.buckets[index]
 # # Find the entry with the given key in that bucket, if one exists
 entry = bucket.find(lambda key_value: key_value[0] == key)
 # Return the given key's associated value
 if entry is not None:
 assert isinstance(entry, tuple)
 assert len(entry) == 2
 return entry[1]
 else: 
 raise KeyError('Key not found: {}'.format(key))
 def set(self, key, value):
 """Insert or update the given key with its associated value.
 Best case running time: O(1) if it's near beginning of LL
 Worst case running time: O(n) in most cases, depends on position in LL"""
 # Find the bucket the given key belongs in
 index = self._bucket_index(key)
 bucket = self.buckets[index]
 # Find the entry with the given key in that bucket, if one exists
 # Check if an entry with the given key exists in that bucket
 entry = bucket.find(lambda key_value: key_value[0] == key) 
 if entry is not None: # Found
 # In this case, the given key's value is being updated
 # Remove the old key-value entry from the bucket first
 bucket.delete(entry) 
 else:
 # If not found, increase size by one to account for new item
 self.size += 1
 # Insert the new key-value entry into the bucket in either case
 bucket.append((key, value))
 # Check if load_factor exceeds threshold, if it does, resize hashtable
 if self.load_factor() > 0.75:
 self._resize()
 def delete(self, key):
 """Delete the given key and its associated value, or raise KeyError.
 Best case running time: O(1) if near beginning of LL
 Worst case running time: O(n) in most cases, depends on position in LL"""
 # Find the bucket the given key belongs in
 index = self._bucket_index(key)
 bucket = self.buckets[index]
 # Find the entry with the given key in that bucket, if one exists
 entry = bucket.find(lambda key_value: key_value[0] == key)
 if entry is not None: # Found
 # Remove the key-value entry from the bucket
 bucket.delete(entry)
 self.size -= 1
 else: # Not found
 raise KeyError('Key not found: {}'.format(key))
 def _resize(self, new_bucket_count=None):
 """Resize this hash table's buckets and rehash all key-value entries.
 Should be called automatically when load factor exceeds a threshold
 such as 0.75 after an insertion (when set is called with a new key).
 Best and worst case running time: ??? under what conditions? [TODO]
 Best and worst case space usage: ??? what uses this memory? [TODO]"""
 # If unspecified, choose new size dynamically based on current size
 if new_bucket_count is None:
 new_bucket_count = len(self.buckets) * 2 # Double the bucket
 # Option to reduce size if buckets are sparsely filled (low load factor)
 elif new_bucket_count is 0:
 new_bucket_count = len(self.buckets) // 2 # Half the bucket
 # Get a list to temporarily hold all current key-value entries
 old_items = self.items()
 # Create a list of new buckets to be used for rehashing
 self.buckets = [LinkedList() for _ in range(new_bucket_count)]
 # Reset size
 self.size = 0
 # Insert each key-value entry into the new list of buckets,
 for key, value in old_items:
 self.set(key, value)
def test_hash_table():
 ht = HashTable(4)
 print('HashTable: ' + str(ht))
 print('Setting entries:')
 ht.set('I', 1)
 print('set(I, 1): ' + str(ht))
 ht.set('V', 5)
 print('set(V, 5): ' + str(ht))
 print('size: ' + str(ht.size))
 print('length: ' + str(ht.length()))
 print('buckets: ' + str(len(ht.buckets)))
 print('load_factor: ' + str(ht.load_factor()))
 ht.set('X', 10)
 print('set(X, 10): ' + str(ht))
 ht.set('L', 50) # Should trigger resize
 print('set(L, 50): ' + str(ht))
 print('size: ' + str(ht.size))
 print('length: ' + str(ht.length()))
 print('buckets: ' + str(len(ht.buckets)))
 print('load_factor: ' + str(ht.load_factor()))
 print('Getting entries:')
 print('get(I): ' + str(ht.get('I')))
 print('get(V): ' + str(ht.get('V')))
 print('get(X): ' + str(ht.get('X')))
 print('get(L): ' + str(ht.get('L')))
 print('contains(X): ' + str(ht.contains('X')))
 print('contains(Z): ' + str(ht.contains('Z')))
 print('Deleting entries:')
 ht.delete('I')
 print('delete(I): ' + str(ht))
 ht.delete('V')
 print('delete(V): ' + str(ht))
 ht.delete('X')
 print('delete(X): ' + str(ht))
 ht.delete('L')
 print('delete(L): ' + str(ht))
 print('contains(X): ' + str(ht.contains('X')))
 print('size: ' + str(ht.size))
 print('length: ' + str(ht.length()))
 print('buckets: ' + str(len(ht.buckets)))
 print('load_factor: ' + str(ht.load_factor()))
if __name__ == '__main__':
 test_hash_table()
#!python
from hashtable import HashTable
import unittest
# Python 2 and 3 compatibility: unittest module renamed this assertion method
if not hasattr(unittest.TestCase, 'assertCountEqual'):
 unittest.TestCase.assertCountEqual = unittest.TestCase.assertItemsEqual
class HashTableTest(unittest.TestCase):
 def test_init(self):
 ht = HashTable(4)
 assert len(ht.buckets) == 4
 assert ht.length() == 0
 assert ht.size == 0
 def test_keys(self):
 ht = HashTable()
 assert ht.keys() == []
 ht.set('I', 1)
 assert ht.keys() == ['I']
 ht.set('V', 5)
 self.assertCountEqual(ht.keys(), ['I', 'V']) # Ignore item order
 ht.set('X', 10)
 self.assertCountEqual(ht.keys(), ['I', 'V', 'X']) # Ignore item order
 def test_values(self):
 ht = HashTable()
 assert ht.values() == []
 ht.set('I', 1)
 assert ht.values() == [1]
 ht.set('V', 5)
 self.assertCountEqual(ht.values(), [1, 5]) # Ignore item order
 ht.set('X', 10)
 self.assertCountEqual(ht.values(), [1, 5, 10]) # Ignore item order
 def test_items(self):
 ht = HashTable()
 assert ht.items() == []
 ht.set('I', 1)
 assert ht.items() == [('I', 1)]
 ht.set('V', 5)
 self.assertCountEqual(ht.items(), [('I', 1), ('V', 5)])
 ht.set('X', 10)
 self.assertCountEqual(ht.items(), [('I', 1), ('V', 5), ('X', 10)])
 def test_length(self):
 ht = HashTable()
 assert ht.length() == 0
 ht.set('I', 1)
 assert ht.length() == 1
 ht.set('V', 5)
 assert ht.length() == 2
 ht.set('X', 10)
 assert ht.length() == 3
 def test_size(self):
 ht = HashTable()
 assert ht.size == 0
 ht.set('I', 1)
 assert ht.size == 1
 ht.set('V', 5)
 assert ht.size == 2
 ht.set('X', 10)
 assert ht.size == 3
 def test_resize(self):
 ht = HashTable(2) # Set init_size to 2
 assert ht.size == 0
 assert len(ht.buckets) == 2
 assert ht.load_factor() == 0
 ht.set('I', 1)
 assert ht.size == 1
 assert len(ht.buckets) == 2
 assert ht.load_factor() == 0.5
 ht.set('V', 5) # Should trigger resize
 assert ht.size == 2
 assert len(ht.buckets) == 4
 assert ht.load_factor() == 0.5
 ht.set('X', 10)
 assert ht.size == 3
 assert len(ht.buckets) == 4
 assert ht.load_factor() == 0.75
 ht.set('L', 50) # Should trigger resize
 assert ht.size == 4
 assert len(ht.buckets) == 8
 assert ht.load_factor() == 0.5
 def test_contains(self):
 ht = HashTable()
 ht.set('I', 1)
 ht.set('V', 5)
 ht.set('X', 10)
 assert ht.contains('I') is True
 assert ht.contains('V') is True
 assert ht.contains('X') is True
 assert ht.contains('A') is False
 def test_set_and_get(self):
 ht = HashTable()
 ht.set('I', 1)
 ht.set('V', 5)
 ht.set('X', 10)
 assert ht.get('I') == 1
 assert ht.get('V') == 5
 assert ht.get('X') == 10
 assert ht.length() == 3
 assert ht.size == 3
 with self.assertRaises(KeyError):
 ht.get('A') # Key does not exist
 def test_set_twice_and_get(self):
 ht = HashTable()
 ht.set('I', 1)
 ht.set('V', 4)
 ht.set('X', 9)
 assert ht.length() == 3
 assert ht.size == 3
 ht.set('V', 5) # Update value
 ht.set('X', 10) # Update value
 assert ht.get('I') == 1
 assert ht.get('V') == 5
 assert ht.get('X') == 10
 assert ht.length() == 3 # Check length is not overcounting
 assert ht.size == 3 # Check size is not overcounting
 def test_delete(self):
 ht = HashTable()
 ht.set('I', 1)
 ht.set('V', 5)
 ht.set('X', 10)
 assert ht.length() == 3
 assert ht.size == 3
 ht.delete('I')
 ht.delete('X')
 assert ht.length() == 1
 assert ht.size == 1
 with self.assertRaises(KeyError):
 ht.delete('X') # Key no longer exists
 with self.assertRaises(KeyError):
 ht.delete('A') # Key does not exist
if __name__ == '__main__':
 unittest.main()

Question 2

Few high-level ideas and minor nitpicks:

have you considered json.dumps() to use for the HashTable's string representation

the keys() and values() methods can make use of a list comprehension:

def keys(self):
 """Return a list of all keys in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all keys in each of the buckets
 return [key 
 for bucket in self.buckets
 for key, _ in bucket.items()]
def values(self):
 """Return a list of all values in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all values in each of the buckets
 return [value 
 for bucket in self.buckets
 for _, value in bucket.items()]

Please check if you can use bucket.keys() and bucket.values() directly, instead of looping over .items().

actually, can be applied for items() as well:

return [item
 for bucket in self.buckets
 for item in bucket.items()]

consider using Type Annotations. For instance, look how you've added this inline comment:
```
return entry is not None # True or False
```
With type hints, you could've removed the comment and add a return type as a hint:
```
def contains(self, key) -> bool:
```
unused loop variable. Since i is unused in the self.buckets = [LinkedList() for i in range(init_size)] expression, use _ as a variable name:
```
self.buckets = [LinkedList() for _ in range(init_size)]
```
format Documentation Strings properly. There are some recommendations like, add a newline into the beginning of a docstring if it consists of multiple lines, end it with a dot etc.

alecxe alecxe 17.5k8 gold badges52 silver badges93 bronze badges · Accepted Answer · 2017-12-04 12:52:03Z

Few high-level ideas and minor nitpicks:

have you considered json.dumps() to use for the HashTable's string representation

the keys() and values() methods can make use of a list comprehension:

def keys(self):
 """Return a list of all keys in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all keys in each of the buckets
 return [key 
 for bucket in self.buckets
 for key, _ in bucket.items()]
def values(self):
 """Return a list of all values in this hash table.
 Best and worst case running time: O(n^2)"""
 # Collect all values in each of the buckets
 return [value 
 for bucket in self.buckets
 for _, value in bucket.items()]

Please check if you can use bucket.keys() and bucket.values() directly, instead of looping over .items().

actually, can be applied for items() as well:

return [item
 for bucket in self.buckets
 for item in bucket.items()]

consider using Type Annotations. For instance, look how you've added this inline comment:
```
return entry is not None # True or False
```
With type hints, you could've removed the comment and add a return type as a hint:
```
def contains(self, key) -> bool:
```
unused loop variable. Since i is unused in the self.buckets = [LinkedList() for i in range(init_size)] expression, use _ as a variable name:
```
self.buckets = [LinkedList() for _ in range(init_size)]
```
format Documentation Strings properly. There are some recommendations like, add a newline into the beginning of a docstring if it consists of multiple lines, end it with a dot etc.

Stack Exchange Network

Python Hash Table and insertion

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Python Hash Table and insertion

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions