Here is my code to serialize and deserialize binary tree, which may have a lot of empty nodes. I am written in Python 2.7. Any advice on performance improvement in terms of algorithm time complexity, bugs or code style advice is appreciated.
My major idea is, serialize only non-empty nodes, with node ID information, where left node ID * 2 = parent node ID
, and right node ID * 2 + 1 = parent node ID
Source code in Python 2.7,
from collections import defaultdict
class TreeNode:
def __init__(self, value, left, right):
self.value = value
self.left = left
self.right = right
def serialize_tree(self, result):
stack = []
stack.append((self, 1))
while len(stack) > 0:
node, index = stack.pop(0)
result.append((node.value, index))
if node.left:
stack.append((node.left, index*2))
if node.right:
stack.append((node.right, index*2+1))
@staticmethod
def deserialize_tree(source):
result = []
for (node_value, index) in source:
node = TreeNode(node_value, None, None)
result.append(node)
if index == 1:
continue
elif index % 2 == 0:
result[index / 2 - 1].left = node
else:
result[(index - 1) / 2 - 1].right = node
return result[0]
def print_tree(self, result):
result.append(self.value)
if self.left:
self.left.print_tree(result)
else:
result.append('#')
if self.right:
self.right.print_tree(result)
else:
result.append('#')
if __name__ == "__main__":
root = TreeNode(1, TreeNode(2, None, TreeNode(4, None, None)), TreeNode(3, None, None))
result = []
root.serialize_tree(result)
print result
new_root = TreeNode.deserialize_tree(result)
result2 = []
new_root.print_tree(result2)
print result2
-
2\$\begingroup\$ Do you have the following requirements: (1) the tree is a binary search tree (i.e. is sorted)? (2) Do you expect the deserialized tree to be of the same shape as the serialized tree? \$\endgroup\$coderodde– coderodde2017年01月31日 07:09:28 +00:00Commented Jan 31, 2017 at 7:09
-
\$\begingroup\$ @coderodde, good question, no requirement for binary search tree, just a normal binary tree. \$\endgroup\$Lin Ma– Lin Ma2017年01月31日 07:29:54 +00:00Commented Jan 31, 2017 at 7:29
-
\$\begingroup\$ @coderodde, for your question -- "Do you expect the deserialized tree to be of the same shape as the serialized tree", definitely I expect they are the same after serialize, and deserialize, because serialization and deserialization is just like compression, do not want to lose any information. Your idea of my original question is highly appreciated. \$\endgroup\$Lin Ma– Lin Ma2017年01月31日 07:30:58 +00:00Commented Jan 31, 2017 at 7:30
1 Answer 1
I would use a map mapping each key of a node \$u\$ in the tree to a 2-tuple containing the key of the parent \$p\,ドル and a boolean indicating whether \$u\$ is a left or right child of \$p\$:
class TreeNode:
def __init__(self, key):
self.key = key
self.left = None
self.right = None
def __eq__(self, other):
return self.key == other.key
def __hash__(self):
return self.key
def serialize_tree(root_node):
map_node_key_to_parent_data = {}
def serialize_tree_impl(node, parent_node):
if parent_node:
parent_data = (parent_node.key, parent_node.left is node)
map_node_key_to_parent_data[node.key] = parent_data
else:
map_node_key_to_parent_data[node.key] = None
if node.left:
serialize_tree_impl(node.left, node)
if node.right:
serialize_tree_impl(node.right, node)
serialize_tree_impl(root_node, None)
return map_node_key_to_parent_data
def deserialize_tree(map_node_key_to_parent_data):
map_node_key_to_tree_node = {}
for node_key in map_node_key_to_parent_data.keys():
if node_key not in map_node_key_to_tree_node.keys():
map_node_key_to_tree_node[node_key] = TreeNode(node_key)
else:
raise TypeError("Duplicate node in the tree.")
root_node = None
for node_key, parent_data in map_node_key_to_parent_data.items():
node = map_node_key_to_tree_node[node_key]
if not parent_data:
root_node = node
else:
parent_node_key = parent_data[0]
on_left = parent_data[1]
parent_node = map_node_key_to_tree_node[parent_node_key]
if on_left:
parent_node.left = node
else:
parent_node.right = node
return root_node
def binary_search_trees_are_identical(root_node1, root_node2):
if not root_node1 and not root_node2:
return True
if not root_node1 or not root_node2:
return False
if root_node1.key != root_node2.key:
return False
return binary_search_trees_are_identical(root_node1.left, root_node2.left)\
and binary_search_trees_are_identical(root_node2.right, root_node2.right)
def main():
root_node = TreeNode(5);
l = TreeNode(3)
r = TreeNode(7)
rr = TreeNode(10)
lr = TreeNode(4)
rrl = TreeNode(11)
rr.left = rrl
root_node.left = l
root_node.right = r
l.right = lr
r.right = rr
serialized_tree = serialize_tree(root_node)
print serialized_tree
deserialized_tree_root_node = deserialize_tree(serialized_tree)
print "Trees are identical:", binary_search_trees_are_identical(root_node, deserialized_tree_root_node)
pass
if __name__ == "__main__":
main()
A bug?
On
root = TreeNode(1, TreeNode(2, TreeNode(3, TreeNode(4, TreeNode(5, None, None), None), None), None), None)
I get an IndexError.
Hope that helps.
Explore related questions
See similar questions with these tags.