I implemented a Python Hash Table using only lists. Here is my implementation:
class HashTable:
# Initialize the table with a fixed size of 54 slots
def __init__(self):
self.Table = [None] * 54
def _get_value(self, key): #This is my hash function.
# It finds the hash of every string. total % 54
total = hash(key)
return total % 54
def insert(self, key):
val = self._get_value(key)
col = False #Collision bool
index = 0
if self.Table[val] == None: #Empty slot - turn into list of keys to avoid extra cases
self.Table[val] = [key]
else: #Collision - append
self.Table[val].append(key)
col = True
index = len(self.Table[val]) - 1
return val, col, index
def delete(self, key):
val = self._get_value(key)
if self.Table[val] == None: #Deleting an unexisting element
return -1, 0
elif key in self.Table[val]: #This is the O(n) part of the hashtable
index = self.Table[val].index(key)
self.Table[val].remove(key)
return val, index
else: # No match was found in list, element does not exist
return -1, 0
def lookup(self, key):
val = self._get_value(key)
if self.Table[val] == None:
return -1, 0
else:
if key in self.Table[val]:
index = self.Table[val].index(key)
return val, index
# No match was found in list, element does not exist
return -1, 0
def clear(self):
self.__init__()
I am using and returning some variables to keep track of collision, values, and indexes to print to the user.
I want to understand how I could improve this implementation - still using only lists (no sets for checking). From a time complexity perspective or even code design, how can I make this more efficient? What design choices were poorly made in my code? What can I improve in this to make it better?
1 Answer 1
Restructuring and micro-optimization
Hard-coded table size 54
is better defined as class constant:
T_SIZE = 54
Encapsulate the crucial list/table at least as protected property self._Table
.
Calling self._get_value(key)
and the condition if self._Table[val] == None
are repeated in most crucial methods. To reduce that noisy repetition an additional method can be defined which will return a tuple of calculated value val
and is_empty
("empty slot" flag):
def _check_value(self, key):
val = self._get_value(key)
is_empty = self._Table[val] is None
return val, is_empty
It doesn't make sense to construct if ... else
conditional if the 1st if
branch return
's immediately.
Both delete
and lookup
methods, on existing item, will perform 2 access/lookup operations on self._Table[val]
:
key in self._Table[val]
self._Table[val].index(key)
To reduce access operations there we can apply try/except
trick and returning the needed result in each separate block. See the final implementation below:
class HashTable:
T_SIZE = 54
# Initialize the table with a fixed size
def __init__(self):
self._Table = [None] * HashTable.T_SIZE
def _get_value(self, key):
total = hash(key)
return total % HashTable.T_SIZE
def _check_value(self, key):
val = self._get_value(key)
is_empty = self._Table[val] is None
return val, is_empty
def insert(self, key):
val, is_empty = self._check_value(key)
col = False # Collision flag
index = 0
if is_empty: # Empty slot - turn into list of keys to avoid extra cases
self._Table[val] = [key]
else:
self._Table[val].append(key) # Collision - append
col = True
index = len(self._Table[val]) - 1
return val, col, index
def delete(self, key):
val, is_empty = self._check_value(key)
if is_empty: # Deleting unexisting element
return -1, 0
try:
index = self._Table[val].index(key)
self._Table[val].remove(key)
return val, index
except ValueError: # No match was found in list, element does not exist
return -1, 0
def lookup(self, key):
val, is_empty = self._check_value(key)
if is_empty:
return -1, 0
try:
index = self._Table[val].index(key)
return val, index
except ValueError: # No match was found in list, element does not exist
return -1, 0
def clear(self):
self.__init__()
In case if _get_value
method won't be used in standalone context - you may easily inline it into _check_value
method.
Sample usage:
h = HashTable()
h.insert('abc')
h.insert('abc')
print(h.lookup('abc'))
print(h.lookup('1abc'))
print(h.delete('abc'))
print(h.delete('abc'))
print(h.delete('abc'))
The output (consecutively):
(8, 0)
(-1, 0)
(8, 0)
(8, 0)
(-1, 0)