I'm writing C++ for the first time and wanted to implement a BST. I come from writing C, so I tend to write code in that way. I'm trying to wrap my head around smart pointers, but can't seem to get them to work properly. This version of the code is without any smart pointers. Any suggestions?
The C++ file:
#include <iostream>
#include "bst.h"
int main()
{
int i = 0;
BST<int> *tree = new BST<int>;
tree->addNode(new node<int>(6));
tree->addNode(new node<int>(5));
tree->addNode(new node<int>(2));
tree->addNode(new node<int>(5));
tree->addNode(new node<int>(7));
tree->addNode(new node<int>(8));
std::cout << "In order ";
tree->inOrderPrint(tree->getRoot());
std::cout << std::endl;
std::cout << "Pre order ";
tree->preOrderPrint(tree->getRoot());
std::cout << std::endl;
std::cout << "Post order ";
tree->postOrderPrint(tree->getRoot());
std::cout << std::endl;
std::cout << "Level order " << std::endl;
tree->levelOrderPrint(tree->getRoot());
std::cout << std::endl;
std::cout << "Minimum value " << tree->findMin(tree->getRoot())->getData();
std::cout << std::endl;
std::cout << "Maximum value " << tree->findMax(tree->getRoot())->getData();
std::cout << std::endl << std::endl;
std::cout << "Successor of 5: ";
if(node<int> *temp = tree->getSuccessor(tree->findNode(5)))
{
std::cout << temp->getData() << std::endl;
}
else
{
std::cout << "None" << std::endl;
}
std::cout << "Successor of 8: ";
if(node<int> *temp = tree->getSuccessor(tree->findNode(8)))
{
std::cout << temp->getData() << std::endl;
}
else
{
std::cout << "None" << std::endl;
}
std::cout << "Predecessor of 7: ";
if(node<int> *temp = tree->getPredecessor(tree->findNode(7)))
{
std::cout << temp->getData() << std::endl;
}
else
{
std::cout << "None" << std::endl << std::endl;
}
std::cout << "Predecessor of 2: ";
if(node<int> *temp = tree->getPredecessor(tree->findNode(2)))
{
std::cout << temp->getData() << std::endl;
}
else
{
std::cout << "None" << std::endl << std::endl;
}
std::cout << "Height: " << tree->getHeight(tree->getRoot()) << std::endl;
delete tree;
return 0;
}
The header file:
#define MAX(a, b) ({ __typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a > _b ? _a : _b; })
template <typename T>
class node
{
T data;
node<T> *left;
node<T> *right;
node<T> *parent;
public:
node(T value);
~node() {};
T getData();
void updateLeft(node<T>* pointer);
void updateRight(node<T>* pointer);
void updateParent(node<T>* pointer);
node<T>* getLeft();
node<T>* getRight();
node<T>* getParent();
};
template <typename T>
node<T> :: node(T value) : parent(NULL), left(NULL), right(NULL)
{
data = value;
}
template <typename T>
T node<T> :: getData()
{
return data;
}
template <typename T>
void node<T> :: updateLeft(node<T>* pointer)
{
left = pointer;
}
template <typename T>
void node<T> :: updateRight(node<T>* pointer)
{
right = pointer;
}
template <typename T>
void node<T> :: updateParent(node<T>* pointer)
{
parent = pointer;
}
template <typename T>
node<T>* node<T> :: getLeft()
{
return left;
}
template <typename T>
node<T>* node<T> :: getRight()
{
return right;
}
template <typename T>
node<T>* node<T> :: getParent()
{
return parent;
}
template <typename T>
class BST
{
node<T> *root;
void removeNode(node<T> *delNode);
public:
BST() : root(NULL) {}
~BST();
node<T>* getRoot();
node<T>* findMin(node<T> *current);
node<T>* findMax(node<T> *current);
node<T>* findNode(T key);
node<T>* getSuccessor(node<T> *current);
node<T>* getPredecessor(node<T> *current);
bool addNode(node<T> *newNode);
void inOrderPrint(node<T> *current);
void preOrderPrint(node<T> *current);
void postOrderPrint(node<T> *current);
void levelOrderPrint(node<T> *current);
void printLevel(node<T>* current, int level);
int getHeight(node<T>* current);
};
template <typename T>
void BST<T> :: removeNode(node<T> *delNode)
{
if(delNode)
{
removeNode(delNode->getLeft());
removeNode(delNode->getRight());
delete delNode;
}
}
template <typename T>
BST<T> :: ~BST()
{
removeNode(root);
}
template <typename T>
node<T>* BST<T> :: getRoot()
{
return root;
}
template <typename T>
bool BST<T> :: addNode(node<T> *newNode)
{
if(!root)
{
root = newNode;
}
else
{
node<T> *temp = root;
node<T> *parent = NULL;
while(temp)
{
parent = std::move(temp);
if(newNode->getData() > temp->getData())
{
temp = temp->getRight();
}
else
{
temp = temp->getLeft();
}
}
if(newNode->getData() > parent->getData())
{
parent->updateRight(newNode);
}
else
{
parent->updateLeft(newNode);
}
newNode->updateParent(parent);
}
return true;
}
template <typename T>
node<T>* BST<T> :: findNode(T key)
{
node<T> *temp = root;
while(temp)
{
T data = temp->getData();
if(data == key)
{
return temp;
}
else if(data > key)
{
temp = temp->getLeft();
}
else
{
temp = temp->getRight();
}
}
return NULL;
}
template <typename T>
void BST<T> :: inOrderPrint(node<T> *current)
{
if(current)
{
inOrderPrint(current->getLeft());
std::cout << current->getData() << " ";
inOrderPrint(current->getRight());
}
}
template <typename T>
void BST<T> :: preOrderPrint(node<T> *current)
{
if(current)
{
std::cout << current->getData() << " ";
inOrderPrint(current->getLeft());
inOrderPrint(current->getRight());
}
}
template <typename T>
void BST<T> :: postOrderPrint(node<T> *current)
{
if(current)
{
inOrderPrint(current->getLeft());
inOrderPrint(current->getRight());
std::cout << current->getData() << " ";
}
}
template <typename T>
void BST<T> :: printLevel(node<T>* current, int level)
{
if(current)
{
if(level == 0)
{
std::cout << current->getData() << " ";
}
else
{
printLevel(current->getLeft(), level-1);
printLevel(current->getRight(), level-1);
}
}
}
template <typename T>
void BST<T> :: levelOrderPrint(node<T> *current)
{
int height = getHeight(current);
for(int level = 0; level < height; level++)
{
printLevel(current, level);
std::cout << std::endl;
}
}
template <typename T>
node<T>* BST<T> :: findMin(node<T> *current)
{
node<T> *temp = current;
node<T> *prev;
while(temp)
{
prev = temp;
temp = temp->getLeft();
}
return prev;
}
template <typename T>
node<T>* BST<T> :: findMax(node<T> *current)
{
node<T> *temp = root;
node<T> *prev;
while(temp)
{
prev = temp;
temp = temp->getRight();
}
return prev;
}
template <typename T>
node<T>* BST<T> :: getSuccessor(node<T> *current)
{
if(current->getRight())
{
return findMin(current->getRight());
}
node<T> *parent = current->getParent();
while(parent && current == parent->getRight())
{
current = parent;
parent = parent->getParent();
}
return parent;
}
template <typename T>
node<T>* BST<T> :: getPredecessor(node<T> *current)
{
if(current->getLeft())
{
return findMax(current->getLeft());
}
node<T> *parent = current->getParent();
while(parent && current == parent->getLeft())
{
current = parent;
parent = parent->getParent();
}
return parent;
}
template <typename T>
int BST<T> :: getHeight(node<T> *current)
{
int max = 0;
if(!current)
{
return 0;
}
max = MAX(getHeight(current->getLeft()), getHeight(current->getRight()));
return (1 + max);
}
-
\$\begingroup\$ We cannot assist with adding smart pointers, but we can review everything else (if it all works). \$\endgroup\$Jamal– Jamal2014年11月25日 23:38:30 +00:00Commented Nov 25, 2014 at 23:38
-
\$\begingroup\$ Everything works. Just wanted to know if I'm following best practices. \$\endgroup\$user3666471– user36664712014年11月25日 23:39:58 +00:00Commented Nov 25, 2014 at 23:39
-
\$\begingroup\$ Can you use / your compiler supports C++11? \$\endgroup\$glampert– glampert2014年11月26日 01:07:37 +00:00Commented Nov 26, 2014 at 1:07
-
2\$\begingroup\$ Yes, I can use C++11. \$\endgroup\$user3666471– user36664712014年11月26日 02:58:13 +00:00Commented Nov 26, 2014 at 2:58
-
\$\begingroup\$ This will be a fun review. I cannot do it now, but I'll do it in 8ish hours if it has not been done by then. \$\endgroup\$nwp– nwp2014年11月26日 09:57:34 +00:00Commented Nov 26, 2014 at 9:57
2 Answers 2
No complete review, but some points to mention (in no specific order). When I speak in imperatives, consider them always as suggestions:
General things:
Don't use this macro:
#define MAX(a, b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a > _b ? _a : _b; })
Rather, use the
std::max
function template defined in thealgorithm
header:std::max(a,b);
Use
nullptr
instead ofNULL
.
Smart pointers:
Regarding the design of your node
class, here are two suggestions, one using std::unique_ptr
the other std::shared_ptr
.
Using std::unique_ptr:
First, the unique_ptr
alternative. For this, you can use a node
class as follows:
template <typename T>
class node
{
T data;
node *parent;
// or:
// node * const parent;
std::unique_ptr<node> right;
std::unique_ptr<node> left;
}
Note that I didn't use node<T>
but rather only node
inside the class. This is because inside a class template, the class name refers to its current instantiation.
Choice of
left
andright
:The child nodes
left
andright
now areunique_ptr
's, which states that thenode
exclusively owns its childs. This is convenient, because, say, when you remove the node from the tree, you usually want all child and grand-child nodes below that erased node to disappear as well. By usingstd::unique_ptr
, you get this for free. Once more, this is the advantage in following the RAII principle: the class manage it's memory ressources itself, so you don't have to care for that manually (as you do via the functionremoveNode
). When you use raw pointers instead, this will become much more difficult and error-prone.Choice of
parent
:On the other hand,
parent
is chosen as a raw pointer with the intention to act solely as an observer. It relies on the fact that thenode
object of which it is a member is also correctly managed by aunique_ptr
. So it will always point to a valid object. Note that you should never do some memory-related operations using parent.As it is also unlikely that child nodes have to change its parent (and since strictly speaking it is forbidden without also changing the managing
unique_ptr
), you can also declare the raw pointer asconst
bynode * const parent;
. Note that theconst
here comes after the*
which means the pointer itself is const (not the object it points to). But changingparent
(in a reasonable way) might also be needed in in rare occasions, so it is maybe too restrictive to generally suggest this.Copying, assignment, etc.
Note that you can't copy the tree with nodes as defined above (at least you won't get it for free, i.e. the copy constructor is not automatically generated by the compiler), which is because
std::unique_ptr
's copy constructor isdelete
'd. So there will always exist only one valid tree (which is usually passed by reference). If you want copies, you can either implement a deep copy, by which you get a completely new tree (--if you change the copy, the original tree will not change). Or you can return anobserver_node
class which resembles yours above (in which you should never do any memory-related operations such asdelete
):template <typename T> class observer_node { T data; node<T> *left; node<T> *right; node<T> *parent; };
But I doubt that I really would do this. The copied-node is read-only (with regard to the topology, i.e. you cannot add or remove nodes) and, in order to work, it poses severe restrictions on the original tree. For instance, one cannot reduce the number of nodes, as then the copied-node will contain a dangling pointer.
For the assignment-- which is usually implemented via the copy-and-swap idiom and thus in terms of the copy constructor -- similar things hold.
So, if you really want to be able to do pointer-copying, you should consider the next alternative.
Using std::shared_ptr:
Here the class looks like this:
template <typename T>
class node : public std::enable_shared_from_this<node<T> >
{
T data;
node *parent;
// or:
// std::weak_ptr<node> parent
std::shared_ptr<node> right;
std::shared_ptr<node> left;
}
Choice of
left
andright
.Now, the child node pointers are
shared_ptr
. They do not exclusively own the object they point to.Copying, etc:
The implications are best worked out by considering the copy behaviour (for whose accomplishment you will get compiler-generated copy, assignment, etc. constructors). Now, copying works as following: a new tree of nodes is generated which point to the same memory locations on the heap. In the copied tree, you can safely change the topology of the tree, i.e. add or remove nodes -- the
shared_ptr
will always take care that the pointed-to objects survive as long as at least one node is pointing to them. You must be careful, though, when you change the memberdata
in the copied tree, as this will change also the value in the original tree.Choice of
parent
parent
again can be chosen as an only-observing raw pointer. The rationale is the same way as before: it relies on the fact that the parent node is managed by anothershared_ptr
and thus will always exist.If you wanted to be sure about that, you could also use a
weak_ptr
here, but I don't think it is necessary.
Oh man, my reviews always get so long ... hope it is helpful and that you even managed to get here.
-
\$\begingroup\$ One might want to be able to split a tree in two or merge two trees into a larger one. Then the parent pointer would need updating. \$\endgroup\$Goswin von Brederlow– Goswin von Brederlow2014年11月26日 16:55:26 +00:00Commented Nov 26, 2014 at 16:55
-
\$\begingroup\$ With
std::shared_ptr
I would usestd::enable_shared_from_this
for nodes. That way a node an return a shared pointer to itself to return a subtree without having to go through the parent. This avoids having to special case the root and checking if you need the left or right child. \$\endgroup\$Goswin von Brederlow– Goswin von Brederlow2014年11月26日 16:58:35 +00:00Commented Nov 26, 2014 at 16:58 -
\$\begingroup\$ @GoswinvonBrederlow: yes, deriving from
std::enable_shared_from_this
is a reasonable thing, added this. I had the same feeling about theparent
pointer, so I recommended it to be non-const in general. \$\endgroup\$davidhigh– davidhigh2014年11月26日 17:03:47 +00:00Commented Nov 26, 2014 at 17:03 -
\$\begingroup\$ Small error,
std::max
is actually defined by<algorithm>
not by<cmath>
as you have stated. \$\endgroup\$glampert– glampert2014年11月26日 17:09:28 +00:00Commented Nov 26, 2014 at 17:09 -
\$\begingroup\$ @glampert: right, thanks for correction. This is something I quite often stumbled upon... and were always surprised. Corrected. \$\endgroup\$davidhigh– davidhigh2014年11月26日 17:14:11 +00:00Commented Nov 26, 2014 at 17:14
I assume you read the other answer already.
Your
MAX
macro is inefficient because it copies its arguments.std::max
does not have that problem. Alsodecltype
is a portable version of__typeof__
and in this case you could have usedauto
instead.int main(){ BST<int> *tree = new BST<int>; ... delete tree; }
Putting variables on the stack makes managing memory easier than with smart pointers:
int main(){ BST<int> tree; ... }
Exception safety, less code and less awkward syntax are an additional bonus.
In
main
you defineint i;
but never use it. Your compiler should warn you about that. Define variables as late as possible instead of defining all variables at the beginning of a function.As a user of your class I do not care about
node
s. If I have to handlenode
s I may as well write my own tree instead of using yours.node
is an implementation detail.class BST{ struct Node{ T data; ... }; public: ... };
Now
Node
isprivate
and not accessible for users. Instead ofbool addNode(node<T> *newNode);
we now havevoid insert(const T &t);
andvoid insert(T &&t);
. If the&&
confuses you read about move semantics. Internally it does basically the same thing asaddNode
did before. The member functions you have becomeprivate
and we add new member functions:template <class T> class BST{ ... public: const T &min() const; const T &max() const; bool contains(const T &key) const; void inOrderPrint() const; void preOrderPrint() const; void postOrderPrint() const; void levelOrderPrint() const; int getHeight() const; void remove(const T &t); ... };
Those functions also do what they did before, just more convenient to use.
BST<int> *tree = new BST<int>; tree->addNode(new node<int>(6)); tree->addNode(new node<int>(5)); tree->addNode(new node<int>(2)); tree->addNode(new node<int>(5)); tree->addNode(new node<int>(7)); tree->addNode(new node<int>(8));
This sucks. You want it to look something like this:
BST<int> tree = { 6, 5, 2, 5, 7, 8 };
To do that you add a constructor that takes an
initializer_list
and inserts all the elements into the tree:BST(const std::initializer_list<T> &il){ for (auto &e : il) insert(e); } BST(std::initializer_list<T> &&il){ for (auto &e : il) insert(std::move(e)); }
You need to
#include <initializer_list>
for this to work.I would add copy assignment operators and assignment operators for
initializer_list
:BST &operator =(BST &&other); BST &operator =(const BST &other); BST &operator =(std::initializer_list<T> &&il); BST &operator =(const std::initializer_list<T> &il);
Consider implementing iterators. An iterator is similar to a pointer, you get the beginning with
auto it = bst.begin();
and the next element with++it;
. Whenit == bst.end();
you are done iterating. The point is that you can separate algorithms such asinOrderPrint
and containers such asBST
. Instead of implementingn
algorithms form
containers with a total ofn * m
implementations you need onlyn + m
implementations if you use iterators and all the neat algorithms such asstd::find_if
andstd::accumulate
will work on yourBST
. It is quite a bit of work to implement iterators, especially since you will probably need to implement 3 kinds for in-order, pre-order and post-order. Once you have those iterators and add support for streams your tree becomes fun to work with:std::ifstream fin("myTree.txt"); BST<int> bst; fin >> bst; //read tree from file std::cout << bst; //print tree //dumping tree to disk std::ofstream fout("preordertree.txt"); for (auto it = bst.preOrderBegin(); it != bst.preOrderEnd(); ++it) fout << *it << '\n';
When you get bored adding features to your BST
take a close look at std::set
and std::vector
+ std::sort
+ std::lower_bound
.
-
\$\begingroup\$ Nice complimentary review, +1. Regarding the
initializer_list
stuff: although it is a good thing in general, I think thattree = { 6, 5, 2, 5, 7, 8 }
is not really intuitive: in order to know the distribution of values to the nodes I need to know powers of two. There might be applications where you simply insert somewhere all values you need, but in others this distribution matters, and then the initializer_list is error-prone ... I'm indecisive whether I would add it, with a slight tendency to yes (--lastly we do C++ and users have to know what they do ;-)). Just wanted to mention that. \$\endgroup\$davidhigh– davidhigh2014年11月27日 01:08:08 +00:00Commented Nov 27, 2014 at 1:08 -
\$\begingroup\$ Next, can you comment what you mean exactly by inOrder, preOrder and postOrder iterators? What exactly does
++
for them? Basically, I can think of several alternatives to traverse the tree, with "layer-by-layer from left-to-right" being probably the easiest to understand. Which one did you mean? \$\endgroup\$davidhigh– davidhigh2014年11月27日 01:22:02 +00:00Commented Nov 27, 2014 at 1:22 -
\$\begingroup\$ @davidhigh I did not understand the power of two part. When iterating through values the values come in some order, usually in sequential order for
vector
-like containers or from lowest to highest forset
-like containers. The question contained code forinOrderPrint
,preOrderPrint
,postOrderPrint
andlevelOrderPrint
which one could mimic with different iterator types. Operator ++ would then get the next element based on the order encoded in its type. \$\endgroup\$nwp– nwp2014年11月27日 02:05:37 +00:00Commented Nov 27, 2014 at 2:05 -
\$\begingroup\$ the power-of-two part was meant like this:
tree = {1}
means there is only the root node with a value of1
.tree = {1,2,3}
means there are further left and right childs of the root node.tree = {1,2}
means onlyleft
is assigned (--first question: is this really desired? often either none or both children are assigned). When I now want to assign,say,7
to theroot->left->left->right
node, I must think a while where to insert a7
in theinitializer_list
. \$\endgroup\$davidhigh– davidhigh2014年11月27日 02:18:14 +00:00Commented Nov 27, 2014 at 2:18 -
\$\begingroup\$ this touches also my other question about iterators: in order to use an
initializer_list
, one needs an intuitive way to traverse the storage scheme (--the same holds for an iterator). So, doestree={1,2,3}
meansroot, root->left, root->right
are assigned, or does it meanroot, root->left, root->left->left
, or something completely different? These questions must be answered in order to use both concepts (iterators and initializer_lists) unambiguously. \$\endgroup\$davidhigh– davidhigh2014年11月27日 02:22:13 +00:00Commented Nov 27, 2014 at 2:22