I am creating a huffman class that can encode and decode text but I am having trouble with my decode method. My encoding method works fine and my decoding method works for smaller amounts of text. But when I try to decode large amounts of text I get a maximum recursion depth error and am not sure how to fix it. The class takes in a dictionary with characters and their frequencies and then turns them into nodes and builds the tree. After building the tree it puts the characters and their bitcode into another dictionary to be used for encoding and decoding.
class Node:
def __init__(self,value,data,left=None,right=None):
#Holds frequency
self.value = value
#Holds character
self.data = data
self.left = left
self.right = right
self.code = ""
def __lt__(self, other):
return self.value < other.value
def __le__(self, other):
return self.value <= other.value
def __gt__(self, other):
return self.value > other.value
def __ge__(self, other):
return self.value >= other.value
class MyHuffman():
def __init__(self):
self.chars = {}
self.tree = None
self.decodePosition = 0
def build(self, weights):
huffNodes = []
heapq.heapify(huffNodes)
for x, y in weights.items():
heapq.heappush(huffNodes,Node(y,x))
while len(huffNodes) > 1:
left = heapq.heappop(huffNodes)
right = heapq.heappop(huffNodes)
left.code = 1
right.code = 0
heapq.heappush(huffNodes, Node(left.value+right.value, left.data + right.data, left, right))
self.tree = huffNodes[0]
self.makeLookupTable(self.tree)
def makeLookupTable(self, node, bitCode=""):
codes = bitCode + str(node.code)
if node.left == None and node.right == None:
self.chars[node.data] = codes
else:
self.makeLookupTable(node.left, codes)
self.makeLookupTable(node.right, codes)
return self.chars
def encode(self, word):
bitString = ""
for x in range (0, len(word)):
if word[x] in self.chars:
for y, z in self.chars.items():
if word[x] == y:
bitString = bitString + z
return bitString
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
dec = self.recursiveTraverseTree(self.tree,bitstring)
self.decodePosition=0
return dec
def recursiveTraverseTree(self, node, bitString, words=""):
if node.left == None and node.right == None:
words= words + str(node.data)
node = self.tree
if self.decodePosition < len(bitString):
if bitString[self.decodePosition] == "0":
self.decodePosition+=1
return self.recursiveTraverseTree(node.right, bitString, words)
elif bitString[self.decodePosition] == "1":
self.decodePosition+=1
return self.recursiveTraverseTree(node.left, bitString, words)
return words
Some test runs below The first test works fine but the second gives a maxRecursion depth error
huff = MyHuffman()
freqs = {'z': 50, 'j': 1, "'": 11, 'a': 42, 'd': 3, 'l': 1, 'f': 14, ' ': 31, 'i': 1, 'w': 8, 's': 41, 'r': 2, 'k': 49, 'b': 28, 'q': 28, 'p': 32, 'g': 33, 'v': 51, 'c': 6, 'h': 6, 'm': 5, 'y': 8, 'o': 36, 't': 28, 'u': 23, 'n': 15}
b = huff.build(freqs)
word = "damn son"
bitstring = huff.encode(word)
decrypted = huff.decode(bitstring)
print(f"[{word}] -> [{bitstring}] -> [{decrypted}]")
#Outputs[damn son] -> [1000011001110000101000110010100010110001] -> [damn son]
word1 ="bgzqbbvkqpaawaoasczz nfq szbumq'vzmbvbknsvvqouapcpbzkvbgtbsggcohto p tzhytz kkutanbv ubsbaogasznr tzz pzzsgqfqpsnfugsktpuzztq s vkfzavokoogmvzgpk tagkz zaoz'vaqpqkvbuvqtsbzgf zk kuzasovhauoqwtvvvovko fvbwhzpvpkskouaupspstbvgsbszipqvvuvmuaspzna stvvk gu saaokggnmsvtspkvqvsahvozsawszfpws skgg bqkqu salg fovuknaknpupovafbovqssagpgbfkcuz gf ofgyrokasgc guabqzbtkosqzbzvspzwgnsyfhvoz akgo'htsovzpakabayffpkpkvqrpzzqogsfvvatsvqbaapozavuyovzpbzsz ppuzrqbq'jaq zuqhhpptvkguktcbkazsszsgvocppzptzzhtzgt b mkznz qqk avkmztzsbzkgrovkqsssb pvtvssoutssazasunaavgybffppqgqagbsa gqscwpno'pb qsknzagtu pztqfbmbztmtuzktvza gaovapkvav govpv oazg'qgrpyszvqqzvgvztbavsy pswurtauztkztavv zcqvzs gbt zgzosfvtpk'gyggbokt ktgukzgskfzf ntavpq bzazvtphvcfba knp'zg'vsyqtvuopz tvzks fn'boaakyvskgvgqggqz tknqbbbvskavkgqpskkkvapca rvzpkksvw'p spvhbzfgggzz'fopfsngoujykutkapbvtqzkkaanpogpnozohvqgfwkdpkvgdpouku v zpkkonuzks oznspqz'aszvnt v ytkcptstkftcknfksbkqszqht z wmpafzwf vkofvadvaqagqzpnzavvuzqkau nqobvggzp qv gup fokkbsoaqkoz svu uvzqzzpyfwuq ozszkzspkavsvta tgovt zpyqvpkzpbnvbsakkgvaktkqwukozp zskzr a aobzopg qtmkakk g nz vgagaktwv tptw bfqmsogzhhkpzwaza tokcbta kzzutvzk zvkunqoowako zabvvkqu'zb kp kvakvthgytvsvstpbvngvskgaqnfkokwozbztgszh'pgbopsgdnvzvozzgvsvgpvuzbuvkzat"
bitstring1 = huff.encode(word1)
decrypted1 = huff.decode(bitstring1)
print(f"[{word1}] -> [{bitstring1}] -> [{decrypted1}]")
#Gives maximum recursion depth error
2 Answers 2
This is a sketch (untested), which rewrite decode in loop and keep tree search in recursion.
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
pos = 0
size = len(bitstring)
dec = ""
while pos < size:
pos, word = self.decode_one(self.tree, bitstring, pos)
dec += word
self.decodePosition = 0
return dec
def decode_one(self, node, bitstring, pos):
"""Return a tuple of the smallest unused position and the word"""
if node.left == None and node.right == None:
return pos, node.data
elif bitstring[pos] == "0":
return self.decode_one(node.right, bitstring, pos + 1)
elif bitstring[pos] == "1":
return self.decode_one(node.left, bitstring, pos + 1)
Comments
word1 has a length of 1260. Huffman code uses at least 1 bit per letter. As a result bitstring1 is at least 1260 bits long. decode recurses once for every bit decoded, or at least 1260 times. That is more than Python's default limit of 1000, so you get the error.
3 Comments
sys.setrecursionlimit(x) where x is your new limit. Say, 2000?
decodein loops instead of recursion entirely or partially.