I have tried to implement my Binary Search Tree in C. The following are the 18 operations defined:
- create a bst
- is_empty
- insert
- delete
- clear
- find
- find_min
- find_max
- height
- size
- depth
- level order traversal
- preorder traversal
- inorder traversal
- postorder traversal
- inorder successor
- is_bst (is the tree a binary search tree)
- is_bst_balanced (is the binary search tree balanced)
This was an important phase in my coding skills to understand what recursion really is. I usually have a table of returns and a recursive call stack both drawn to track the running of recursion, and that helped immensely to grasp the background work of recursion. If you find any improvement to be said about some recursive functions in this BST implementation, I would be grateful to read them through.
This is the entire code. I have included my Queue because I needed for the level_order
function.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define max(x, y) (((x) > (y)) ? (x) : (y))
typedef struct Node {
int data;
struct Node* left;
struct Node* right;
} Node;
//---------------Queue-----------------
typedef struct QNode {
Node* data;
struct QNode* next;
} QNode;
typedef struct Queue {
int size;
QNode* head;
QNode* tail;
} Queue;
const Queue queue_init = { .size = 0, .head = NULL, .tail = NULL };
QNode* create_node(Node* elm) {
QNode* node = malloc(sizeof * node);
if (!node) return node;
node->data = elm;
node->next = NULL;
return node;
}
int is_empty_q(Queue *q) {
return q->tail == NULL;
}
QNode* tail_prev(Queue *q) {
QNode* node = q->head, *prev = NULL;
while (node->next) {
prev = node;
node = node->next;
}
return prev;
}
void enqueue(Queue *q, Node* elm) {
QNode* updated_head = create_node(elm);
if (!q->head) {
q->head = updated_head;
q->tail = q->head;
}
else {
updated_head->next = q->head;
q->head = updated_head;
}
q->size++;
}
Node* dequeue(Queue *q) {
if (!is_empty_q(q)) {
QNode* node = q->tail;
Node* elm = q->tail->data;
q->tail = tail_prev(q);
if (q->tail) {
q->tail->next = NULL;
}
else {
q->head = NULL;
}
free(node);
q->size--;
return elm;
}
return NULL;
}
Node* front(Queue *q) {
Node* front;
if (q->tail)
front = q->tail->data;
else
front = NULL;
return front;
}
void clear_q(Queue *q) {
while (q->tail)
dequeue(q);
printf("Queue Cleared");
}
//---------------BST------------------
typedef struct BST {
Node* root;
} BST;
const BST bst_init = { .root = NULL };
Node* create_node_bst(int elm) {
Node* node = malloc(sizeof * node);
if (!node) return node;
node->data = elm;
node->left = node->right = NULL;
return node;
}
BST* create_bst() {
BST* bst = malloc(sizeof * bst);
if (!bst) return bst;
bst->root = NULL;
return bst;
}
int is_empty(Node* root) {
return root == NULL;
}
Node* insert(Node* root, int elm) { // -V
if (!root) {
root = create_node_bst(elm);
}
else if (elm <= root->data) {
root->left = insert(root->left, elm);
}
else {
root->right = insert(root->right, elm);
}
return root;
}
Node* find(Node* root, int elm) { // -V
if (!is_empty(root)) {
if (!root) {
root = NULL;
}
else if (root->data == elm) {
root = root;
}
else if (elm <= root->data) {
root = find(root->left, elm);
}
else {
root = find(root->right, elm);
}
return root;
}
else
return root;
}
Node* find_max(Node* root) { //-V
if (!is_empty(root)) {
if (root->right == NULL)
return root;
else {
return find_max(root->right);
}
}
else
return root;
}
Node* find_min(Node* root) { //-V
if (!is_empty(root)) {
if (!root->left)
return root;
else {
return find_min(root->left);
}
}
else
return root;
}
int height(Node* root) {
if (root == NULL) {
return -1; // 0 if heighe is number of edges, or -1 if height=number of edges
}
int left_height = height(root->left);
int right_height = height(root->right);
return max(left_height, right_height) + 1;
}
int depth(Node* root, int elm) {
if (root->data == elm) {
return 0;
}
else if (elm < root->data) {
return depth(root->left, elm) + 1;
}
else {
return depth(root->right, elm) + 1;
}
}
Node* delete(Node* root, int elm) {
if (root == NULL)
return root;
else if (elm > root->data)
root->right = delete(root->right, elm);
else if (elm < root->data)
root->left = delete(root->left, elm);
else { // elm found
if (root->left == NULL && root->right == NULL) {
free(root);
root = NULL;
}
else if (root->left == NULL) {
Node* temp = root;
root = root->right;
free(temp);
}
else if (root->right == NULL) {
Node* temp = root;
root = root->left;
free(temp);
}
else { //this case is done until it is reduced to one of the previous three cases
Node* temp = find_min(root->right);
root->data = temp->data;
root->right = delete(root->right, elm);
}
}
return root;
}
int is_bst(Node* root, int min, int max) { // solution 1
if (root == NULL) {
return 1;
}
else if (root->data < max && root->data > min && is_bst(root->left, min, root->data) && is_bst(root->right, root->data, max))
return 1;
else
return 0;
} // solution 2, traverse inorder and check if the list is sorted
int is_bst_balanced(Node* root) {
int is_balanced = 1;
int left_height = height(root->left);
int right_height = height(root->right);
if (abs(right_height - left_height) > 1)
is_balanced = 0;
return is_balanced;
}
int size(Node* root) {
if (!root)
return 0;
int left_size = size(root->left);
int right_size = size(root->right);
return left_size + right_size + 1; // + 1 is for the ancesstor
}
void level_order(Node* root) { // visit all children before grand children
if (!is_empty(root)) {
Queue *q = malloc(sizeof *q);
if (q) {
*q = queue_init;
enqueue(q, root);
while (!is_empty_q(q)) {
Node* cur = front(q);
printf("%d ", cur->data);
if (cur->left != NULL)
enqueue(q, cur->left);
if (cur->right != NULL)
enqueue(q, cur->right);
dequeue(q);
}
}
}
}
void pre_order(Node* root) { //D<root>L<left>R<right> -- preorder (of root)
if (root) {
printf("%d ", root->data);
pre_order(root->left);
pre_order(root->right);
}
}
void in_order(Node* root) { //L<left>D<root>R<right> -- inorder -- gives sorted list
if (root) {
in_order(root->left);
printf("%d ", root->data);
in_order(root->right);
}
}
Node* in_order_suc(Node* root, int data) {
Node* cur = find(root, data);
if (!cur)
return cur;
if (cur->right != NULL) { //case 1: node has sub tree
return find_min(cur->right);
}
else { //case 2: no right sub tree
Node* suc = NULL, *prev = root;
while (prev != cur) {
if (cur->data < prev->data) {
suc = prev;
prev = prev->left;
}
else {
prev = prev->right;
}
}
return suc;
}
}
void post_order(Node* root) { //L<left>R<right>R<root> -- postorder
if (root) {
post_order(root->left);
post_order(root->right);
printf("%d ", root->data);
}
}
Node* clear(Node* root) {
while (root) {
root = delete(root, root->data);
}
return root;
}
int main() {
#define MAX 8
int n = MAX;
BST bst1 = bst_init;
Node* bst1_root = bst1.root;
int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18};
if (!arr) return 0;
for (int i = 0; i < n; i++)
bst1_root = insert(bst1_root, arr[i]);
printf("height: %d \n", height(bst1_root));
printf("size: %d \n", size(bst1_root));
printf("depth of 13: %d \n", depth(bst1_root, 18));
printf("is_bst: %d \n", is_bst(bst1_root, -1000, 1000)); //assuming -1000 < data(bst) < 1000
printf("is_bst_balanced: %d \n", is_bst_balanced(bst1_root));
printf("min: %d \n", find_min(bst1_root)->data);
printf("max: %d \n", find_max(bst1_root)->data);
printf("element: %d found \n", find(bst1_root, 19)->data);
printf("level order ");
level_order(bst1_root);
printf("\n");
printf("preorder order ");
pre_order(bst1_root);
printf("\n");
printf("inorder order ");
in_order(bst1_root);
printf("\n");
printf("postorder order ");
post_order(bst1_root);
printf("\n");
printf("inorder successor of 9 is %d \n", in_order_suc(bst1_root, 9)->data);
bst1_root = delete(bst1_root, 18);
printf("in order ");
in_order(bst1_root);
printf("\n");
bst1_root = insert(bst1_root, 18);
printf("in order ");
in_order(bst1_root);
bst1_root = clear(bst1_root);
printf("\n");
if (!bst1_root)
printf("BST Cleared!");
return 0;
}
1 Answer 1
Fix compilation warnings
If your compiler isn't telling you about these, you haven't enabled enough warnings:
BST* create_bst(void) {
/*🔺🔺🔺*/
int main(void) {
/*🔺🔺🔺*/
int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18};
//if (!arr) return 0;
assert(arr); /* local array cannot be a null pointer */
Fix the leak
Valgrind identifies memory we haven't freed:
==3037411== HEAP SUMMARY:
==3037411== in use at exit: 24 bytes in 1 blocks
==3037411== total heap usage: 19 allocs, 18 frees, 1,392 bytes allocated
==3037411=わ=わ
=わ=わ3037411=わ=わ 24 bytes in 1 blocks are definitely lost in loss record 1 of 1
==3037411== at 0x48407B4: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==3037411== by 0x109903: level_order (283676.c:258)
==3037411== by 0x109D81: main (283676.c:347)
Prefer functions to macros
#define max(x, y) (((x) > (y)) ? (x) : (y))
This is a dangerous macro, as it expands arguments multiple times, causing doubled side-effects. Prefer to write a function for each type it's used with, or simply inline it since it's used exactly once in this program.
Clients shouldn't need to know about nodes
Node
and QNode
are implementation details that shouldn't be in the public (non-static
) interface. They should be created and deleted automatically by the public function.
We'd find it much easier to follow if the code at least foresaw separate compilation and put the "header" content ahead of implementation.
Handle memory failure gracefully
We have a good check in create_node()
:
QNode* node = malloc(sizeof * node); if (!node) return node;
However, when we call it, we fail to account for the fact it can return a null pointer:
QNode* updated_head = create_node(elm); /* possibly null */ if (!q->head) { ⋮ } else { updated_head->next = q->head; /* BANG! */ ⋮ }
This is something you have been told about in a previous review, but seem not to have learnt from.
Similarly, the example code in main()
doesn't show us using the return value from insert()
to determine whether the operation was successful.
The queue seems backwards
It's naturally quite easy to add elements to the tail of a singly-linked list, and to remove them from the head. Doing it the other way around means that every dequeue()
calls tail->prev
to walk the entire length of the list. That's obviously much less efficient.
Tree operations don't have to be recursive
We seem to have the beginnings of iterative operation (assigning to root
rather than just performing a tail-call) in insert()
and find()
, but have failed to follow up on that.
Traversal functions are inflexible
level_order()
, pre_order()
, in_order()
and post_order()
just print the values encountered. Traversal functions should take a function pointer to permit other actions. We usually also accept a void*
which the function can use as state while it operates:
void in_order(Node* root, void(*func)(Node*,void*), void *func_data);
Think about modifiability
Functions that I'd expect to accept a const
tree, such as find()
, have mutable arguments. A good interface is much more explicit about which operations will modify the tree and which will not.
Tests should be self-checking
The demo program is interesting, but it would be much more useful if it actually tested the functionality and returned appropriate exit status for success or failure.
-
\$\begingroup\$ Thanks, the point about the inflexibility of traversal function is very true. I already fixed that memory leak. \$\endgroup\$V_head– V_head2023年03月03日 11:55:57 +00:00Commented Mar 3, 2023 at 11:55
-
2\$\begingroup\$ Toby, what standard and warnings are used that complain about
BST* create_bst()
? \$\endgroup\$chux– chux2023年03月03日 20:23:09 +00:00Commented Mar 3, 2023 at 20:23 -
3\$\begingroup\$ Toby, nice answer, yet too bad OP accepted it so soon. So many more review points exists. \$\endgroup\$chux– chux2023年03月03日 20:49:38 +00:00Commented Mar 3, 2023 at 20:49
-
1\$\begingroup\$ @chux, in my case the specific warning for both of those was
gcc -std=c17 -Wstrict-prototypes
. \$\endgroup\$Toby Speight– Toby Speight2023年03月08日 15:00:27 +00:00Commented Mar 8, 2023 at 15:00
typedef
ofBST
. Why isn't it used in what interface there is? \$\endgroup\$BST doesn't allow duplicates, no?
I don't know any authoritative definition of BST: up to you. You don't need/want duplicates in sets. When there is ("payload") data associated with keys, duplicates may be essential. \$\endgroup\$is enough to ...
write tests, have them executed automatically. This is not chat. \$\endgroup\$int arr[MAX] = {15, 10, 20, 9, 13, 19, 22, 18}; if (!arr) return 0;
===> doesn't make sense. The address ofarr
will always evaluate astrue
. \$\endgroup\$main()
. You did notfree()
the memory allocated bylevel_order()
. \$\endgroup\$