I could not find a textbook implementation of a 2-3 tree in python, so I decided I will try to do mine.
class Node:
def __init__(self, data, parent=None):
self.nodeType = 2
self.d1, self.d2, self.d3 = data, None, None
self.c1, self.c2, self.c3, self.c4 = None, None, None, None
self.parent = parent
def push(self, data):
if self.nodeType == 2:
self.nodeType = 3
self.d1, self.d2 = sorted([self.d1, data])
elif self.nodeType == 3:
self.nodeType = 4
self.d1, self.d2, self.d3 = sorted([self.d1, self.d2, data])
def split(self):
# Case O, if there is nothing to do
if self.nodeType < 4:
return
# Case I, splitting when there is no parent
if self.parent == None:
leftChild = Node(self.d1, self)
rightChild = Node(self.d3, self)
leftChild.c1, leftChild.c2 = self.c1, self.c2
rightChild.c1, rightChild.c2 = self.c3, self.c4
self.nodeType = 2
self.d1, self.d2, self.d3 = self.d2, None, None
self.c1, self.c2, self.c3, self.c4 = leftChild, rightChild, None, None
# Case II, when parent is a 2-node
elif self.parent.nodeType == 2:
# subcase a: when the current node is the left child of the parent node
if self == self.parent.c1:
midChild = Node(self.d3, self.parent)
midChild.c1, midChild.c2 = self.c3, self.c4
self.parent.push(self.d2)
self.parent.c1, self.parent.c2, self.parent.c3 = self.parent.c1, midChild, self.parent.c2
self.nodeType = 2
self.c1, self.c2, self.c3, self.c4 = self.c1, self.c2, None, None
self.d1, self.d2, self.d3 = self.d1, None, None
# subcase b: when the current node is the right child of the parent node
elif self == self.parent.c2:
midChild = Node(self.d1, self.parent)
midChild.c1, midChild.c2 = self.c1, self.c2
self.parent.push(self.d2)
self.parent.c1, self.parent.c2, self.parent.c3 = self.parent.c1, midChild, self.parent.c2
self.nodeType = 2
self.c1, self.c2, self.c3, self.c4 = self.c3, self.c4, None, None
self.d1, self.d2, self.d3 = self.d3, None, None
# Case III, when parent is a 3-node
elif self.parent.nodeType == 3:
# subcase a: when the current node is the left child of the parent node
if self == self.parent.c1:
newNode = Node(self.d3, self.parent)
newNode.c1, newNode.c2 = self.c3, self.c4
self.parent.push(self.d2)
self.parent.c1, self.parent.c2, self.parent.c3, self.parent.c4 = self.parent.c1, newNode, self.parent.c2, self.parent.c3
self.nodeType = 2
self.c1, self.c2, self.c3, self.c4 = self.c1, self.c2, None, None
self.d1, self.d2, self.d3 = self.d1, None, None
# subcase b: when the current node is the middle child of the parent node
elif self == self.parent.c2:
newNode = Node(self.d3, self.parent)
newNode.c1, newNode.c2 = self.c3, self.c4
self.parent.push(self.d2)
self.parent.c1, self.parent.c2, self.parent.c3, self.parent.c4 = self.parent.c1, self.parent.c2, newNode, self.parent.c3
self.nodeType = 2
self.c1, self.c2, self.c3, self.c4 = self.c1, self.c2, None, None
self.d1, self.d2, self.d3 = self.d1, None, None
# subcase c: when the current node is the right node of the parent node
elif self == self.parent.c3:
newNode = Node(self.d1, self.parent)
newNode.c1, newNode.c2 = self.c1, self.c2
self.parent.push(self.d2)
self.parent.c1, self.parent.c2, self.parent.c3, self.parent.c4 = self.parent.c1, self.parent.c2, newNode, self.parent.c3
self.nodeType = 2
self.c1, self.c2, self.c3, self.c4 = self.c3, self.c4, None, None
self.d1, self.d2, self.d3 = self.d3, None, None
# now recursively split the parent
self.parent.split()
def insert(self, data):
# if this node is a leaf
if self.c1 == None:
self.push(data)
self.split()
# if this node is not a leaf, and a 2-node
elif self.nodeType == 2:
if data < self.d1:
self.c1.insert(data)
else:
self.c2.insert(data)
# if this node is a 3-node
elif self.nodeType == 3:
if data < self.d1:
self.c1.insert(data)
elif data > self.d3:
self.c3.insert(data)
else:
self.c2.insert(data)
def find(self, data):
# if this node is a leaf
if self.c1 == None:
if data in [self.d1, self.d2, self.d3]:
return True
else:
return False
# if this node is not a leaf, and a 2-node
elif self.nodeType == 2:
if data < self.d1:
self.c1.find(data)
else:
self.c2.find(data)
# if this node is a 3-node
elif self.nodeType == 3:
if data < self.d1:
self.c1.find(data)
elif data > self.d3:
self.c3.find(data)
else:
self.c2.find(data)
class TwoThreeTree:
def __init__(self):
self.isEmpty = True
self.root = None
def insert(self, data):
if self.isEmpty:
self.isEmpty = False
self.root = Node(data)
else:
self.root.insert(data)
def find(self, data):
if self.isEmpty:
return False
else:
self.root.find(data)
General feedback on the coding style is welcome. I am also looking for some specific feedback:
- This seems to be a lot of casework. How can I reuse more of my code to make it more elegant? I also believe that being able to organize this code more nicely means that I'd be able to generalize this for a more general B-Tree. How can I do that?
1 Answer 1
My impressions on reading the question:
I bet there's a WikiPedia article on 2-3 trees. But OP gave no link. :(
No doc block describing 2-3 trees
No invariant
Lots of, as you say, "case work" encoded in if/elif statements that should probably be subclasses.
Digging in:
Check the Docs
You have more fields than are needed. 2-3 tree nodes have either 2 children and 1 data field, or 3 children an 2 data fields. You have self.d3
and self.c4
in your initializer...
Own the node
You have a tree class and a node class. Move the node class(es) into the tree:
class TwoThreeTree:
'''Implement a 2-3 tree using pure Python 3.
See https://en.wikipedia.org/wiki/2-3_tree
'''
class _2node:
def __init__(self, data=None, left=None, right=None):
self.data = data
self.left = left
self.right = right
def insert(self, data):
...
def __init__(self):
self.root = None
def is_empty(self):
return self.root is None
def insert(self, data):
if self.is_empty():
self.root = self._2node(data)
else:
self.root = self.root.insert(data)
Simplify and Subclass
Once you've got the nodes inside the tree class, split them off. Python does duck-typing, so they don't need to share a common ancestor (although they might, if you find enough behavior to inherit).
class _2node:
def __init__(self, data, left=None, right=None):
...
def insert(self, ...):
...
class _3node:
def __init__(self, data1, data2=None, left=None, right=None):
...
def insert(self, ...):
...
You may wish to add a .parent
link pointing up the tree, to simplify the split code.
Add an Invariant
Write yourself a method (or methods) called invariant()
. Assert whatever things you find to assert - I think the wikipedia article has some pretty solid candidates. Call it when you're about to exit a public method, and whereever else you feel the need.
-
1\$\begingroup\$ I do not understand why having
_2node
and_3node
as seperate classes would help. \$\endgroup\$Agnishom Chattopadhyay– Agnishom Chattopadhyay2018年02月25日 06:17:41 +00:00Commented Feb 25, 2018 at 6:17 -
3\$\begingroup\$ Everywhere you have "if nodeType == 2" or "if nodeType==3" is a different behavior. If those were different classes, you just write the code and know what the nodeType is. \$\endgroup\$aghast– aghast2018年02月25日 06:40:44 +00:00Commented Feb 25, 2018 at 6:40