Minimum Spanning Tree using Prim's algorithm

Question 1

I have implemented a Minimum Spanning Tree using Prim's Algorithm. Could someone give some about some improvements for code structure, conventions, performance, etc?

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#define MAX_INT 512
struct q_node
{
 int val;
 struct q_node *next, *prev;
} q_node;
struct queue
{
 struct q_node *head, *tail;
} queue;
int graph[MAX_INT][MAX_INT] = {{0}};
int parent[MAX_INT] = {-1}, key[MAX_INT] = {-1}, adj[MAX_INT] = {-1};
int adj_top = 0;
struct queue *init_q()
{
 struct queue *q = malloc(sizeof(struct queue));
 q->head = NULL; q->tail = NULL;
 return q;
}
void free_q(struct queue **q_ptr)
{
 struct q_node *q_temp = (*q_ptr)->head, *q_temp2;
 while (q_temp) {
 q_temp2 = q_temp;
 q_temp = q_temp->next;
 free(q_temp2);
 }
 free(*q_ptr);
}
struct queue *enqueue(struct queue *q, int i)
{
 struct q_node *q_temp = malloc(sizeof(struct q_node));
 q_temp->prev = NULL;
 q_temp->next = NULL;
 q_temp->val = i;
 if (q->head == NULL && q->tail == NULL) {
 q->head = q_temp;
 q->tail = q_temp;
 } else {
 q_temp->prev = q->tail;
 q->tail->next = q_temp;
 q->tail = q_temp;
 }
 return q;
}
int is_empty(struct queue *q)
{
 return (q->head == NULL && q->tail == NULL) ? 1 : 0;
}
void traverse(struct queue *q)
{
 struct q_node *q_inst = q->head;
 if (q->head) {
 do {
 printf("%d ", q_inst->val);
 q_inst = q_inst->next;
 } while (q_inst != NULL);
 }
 printf("\n");
}
int extract_min(struct queue **q_ptr, int no_vert)
{
 struct queue *q = *q_ptr;
 struct q_node *q_inst = q->head, *q_temp = NULL;
 int i, min_val = MAX_INT, min_head;
 // i = q_inst->val;
 while (q_inst != NULL) {
 i = q_inst->val;
 if (key[i] < min_val) {
 min_val = key[i];
 min_head = i;
 q_temp = q_inst;
 }
 q_inst = q_inst->next;
 // i = q_inst->val;
 }
 if (q_temp == q->head && q_temp == q->tail) {
 q->head = NULL;
 q->tail = NULL;
 free(q_temp);
 } else if (q_temp == q->head && q_temp != NULL) {
 if (q_temp->next)
 q_temp->next->prev = NULL;
 (*q_ptr)->head = q_temp->next;
 free(q_temp);
 } else if (q_temp == q->tail && q_temp != NULL) {
 if (q_temp->prev)
 q_temp->prev->next = NULL;
 (*q_ptr)->tail = q_temp->prev;
 free(q_temp);
 } else {
 q_temp->prev->next = q_temp->next;
 q_temp->next->prev = q_temp->prev;
 free(q_temp);
 }
 return min_head;
}
int isin_q(struct queue *q, int v)
{
 struct q_node *q_inst = q->head;
 while (q_inst) {
 if (q_inst->val == v)
 return 1;
 q_inst = q_inst->next;
 }
 return 0;
}
void find_adj(int u, int no_vert)
{
 int i;
 for (i = 0; i < no_vert; i++) {
 if (graph[u][i] != 0) {
 adj[adj_top++] = i;
 }
 }
}
void min_spantree(int no_vert, int no_edge, int root)
{
 int u, v, i;
 struct queue *q = init_q();
 for (i = 0; i < no_vert; i++) {
 key[i] = MAX_INT;
 parent[i] = -1;
 }
 key[root] = 0;
 for (i = 0; i < no_vert; i++) {
 q = enqueue(q, i);
 }
 /* printf(" Key Parent\n");
 for (i = 0; i < no_vert; i++) {
 printf("%6d%6d\n", key[i], parent[i]);
 }*/
 while (!is_empty(q)) {
 // traverse(q);
 u = extract_min(&q, no_vert);
 // printf("U: %d\n", u);
 find_adj(u, no_vert);
 for (i = 0; i < adj_top; i++) {
 v = adj[i];
 if (isin_q(q, v) && graph[u][v] < key[v]) {
 parent[v] = u;
 key[v] = graph[u][v];
 }
 }
 adj_top = 0;
 }
 free_q(&q);
}
int main(int argc, char *argv[])
{
 int no_vert, no_edge, root_vert;
 int i, from, to;
 scanf("%d %d", &no_vert, &no_edge);
 scanf("%d", &root_vert);
 for (i = 0; i < no_edge; i++) {
 scanf("%d %d", &from, &to);
 scanf("%d", &graph[from][to]);
 graph[to][from] = graph[from][to];
 }
 min_spantree(no_vert, no_edge, root_vert);
 printf(" Node Parent key\n");
 // printf("%6d%6d\n", parent[0], key[0]);
 for (i = 0; i < no_vert; i++) {
 printf("%7d%7d%7d\n", i, parent[i], key[i]);
 }
 return 0;
}

Question 2

I see some things that might help you improve your code.

Add comments

Prim's algorithm is well known, and the names of your functions are generally well chosen, but there are no comments in the code to aid the reader. Just a few well-chosen comments would be useful.

In particular the format of the input is not at all documented, leaving the user to reverse engineer the format by reading the code.

Consider a more user-friendly input format

Right now, the user must specify the numbers of vertices and edges and must give numeric node numbers. It would be nice, and wouldn't complicate your code much at all, if one could use letter or word designators for the nodes and to have the computer automatically count the edges.

Perform input sanitation

The scanf function is handy for well-formatted input but not particularly robust for general user input. For instance, if the use enters a negative number for a node number, this code doesn't catch that fact and attempts to address an array with a negative index value. On my machine, this cause a segfault and a program crash.

This could be avoided by doing error checking and by validating the return value from scanf (which returns the number of items scanned) and by examining the value. All of the inputs should probably be unsigned (with the possible exception of the link values), so the code should probably use the %u format specifier instead of %d.

Eliminate unused variables

Unused variables are a sign of poor code quality, so eliminating them should be a priority. In this code, main uses neither argc nor argv and so that function should be int main(). My compiler also tells me that no_vert is unused in extract_min() and no_edge is unused in min_spantree(). Your compiler is probably also smart enough to tell you that, if you ask it to do so.

Eliminate unused functions

The traverse() function is not used. It appears that perhaps it was once used and then the only call commented out. Eliminating it makes the code easier to understand and maintain.

Eliminate unused headers

Nothing from the <limits.h> appears to actually have been used within this code, so it would be best to eliminate it.

Eliminate global variables where practical

The code declares and uses 5 global variables. Global variables obfuscate the actual dependencies within code and make maintenance and understanding of the code that much more difficult. It also makes the code harder to reuse. For all of these reasons, it's generally far preferable to eliminate global variables and to instead pass pointers to them. That way the linkage is explicit and may be altered more easily if needed. For example, to eliminate the adj_top global, the find_adj() function could be rewritten like this:

int find_adj(int u, int no_vert)
{
 int i;
 int adj_top = 0;
 for (i = 0; i < no_vert; i++) {
 if (graph[u][i] != 0) {
 adj[adj_top++] = i;
 }
 }
 return adj_top;
}

Within the context that it's called in min_spantree the call to it becomes:

int adj_top = find_adj(u, no_vert);
for (i = 0; i < adj_top; i++) { 
 ... 
}

Consider regularizing function names

The functions init_q, free_q, enqueue, is_empty, extract_min, isin_q, all deal with the same queue structure. In some places, as with init_q and free_q you have used a _q suffix which suggests this. I'd recommend extending and enhancing that practice and rename the functions q_init, q_free, q_push, q_is_empty, q_get_min, q_contains. By making the function names more regular, it's easy to see at a glance that any q_ function deals with a queue and the names are an attempt to concisely represent what the function does with the queue.

Check return values for `malloc`

The two places you use malloc in the code immediately use the return value as though it were a valid pointer. However, what if you're out of memory and the allocation failed? In such cases, malloc returns NULL and so the code would make a bad situation (out of memory) even worse by dereferencing NULL and causing a crash. Better is to simply add a check and abort if the return value is NULL before dereferencing.

Rethink your data structures

Prim's algorithm is designed to create a minimum spanning tree, but strangely enough, no tree structure is actually used within the code. That's not necessarily a problem, but it's worth thinking about. Also, the parent and key arrays are actually tightly coupled and one is never altered without the other. This strongly suggests that what is actually called for is an array of a struct containing both. Finally, the graph structure really only needs to be large enough to contain no_vert points. Dynamically, rather than statically allocating the size would reduce wasted space and clarify what it contains.

Eliminate redundant loops

Within min_spantree the code loops through all vertices and initializes the key and parent values. It then loops through the exact same nodes and adds them to the queue. It would make more sense to do everything in a single loop through since they are not dependent.

Consider using a `for` loop rather than `while` where appropriate

In the isin_q() routine, the code currently uses a while loop. It's not wrong as written, but when a loop has an initialization, a continuation condition and an iteration step, it might be more appropriately written as a for loop. As rewritten (and renamed as mentioned above) this would become:

int q_contains(struct queue *q, int v)
{
 struct q_node *q_inst;
 for (q_inst = q->head; q_inst; q_inst = q_inst->next)
 if (q_inst->val == v)
 return 1;
 }
 return 0;
}

Avoid using the same variable for two different things

The code currently defines and uses MAX_INT as both the size of various arrays and as a maximum possible link cost. These are actually two different meanings that are not necessarily related. For example, one could imagine that the graph array could be dynamically sized to the number of vertices and the maximum possible link cost could be some completely different value such as INT_MAX as defined in <limits.h>.

Question 3

You don't really gain anything by abbreviating your queue related functions. So name them init_queue, free_queue etc.
You are also being inconsistent with naming. Sometimes you have a suffix like q and sometimes you don't like in is_empty. You should use a common naming convention for your data structure.
Extract the data structure in to separate files. The queue implementation is independent of the algorithm and should be treated separately. A dequeue (double-ended-queue) is a generic enough structure to possibly re-use it in the future.
MAX_INT is a rather bad name for that constant. MAX_DIMENSIONS would have been better.
traverse should not do a printf - it should take a function pointer as callback which gets called on every node. This makes it more versatile and the user can chose what to do (dump it to stdout , write it to a file, send it over the network, etc.)
There could be some merit in checking the return value of malloc for NULL and abort() in that case. Your code will probably most likely crash as it stands (which is what you want in that case) but technically per C standard dereferencing a NULL pointer is undefined behavior and theoretically your system could do all kinds of stuff in that case.

Edward Edward 67.2k4 gold badges120 silver badges284 bronze badges · Accepted Answer · 2014-10-19 21:04:38Z

I see some things that might help you improve your code.

Add comments

Prim's algorithm is well known, and the names of your functions are generally well chosen, but there are no comments in the code to aid the reader. Just a few well-chosen comments would be useful.

In particular the format of the input is not at all documented, leaving the user to reverse engineer the format by reading the code.

Consider a more user-friendly input format

Right now, the user must specify the numbers of vertices and edges and must give numeric node numbers. It would be nice, and wouldn't complicate your code much at all, if one could use letter or word designators for the nodes and to have the computer automatically count the edges.

Perform input sanitation

The scanf function is handy for well-formatted input but not particularly robust for general user input. For instance, if the use enters a negative number for a node number, this code doesn't catch that fact and attempts to address an array with a negative index value. On my machine, this cause a segfault and a program crash.

This could be avoided by doing error checking and by validating the return value from scanf (which returns the number of items scanned) and by examining the value. All of the inputs should probably be unsigned (with the possible exception of the link values), so the code should probably use the %u format specifier instead of %d.

Eliminate unused variables

Unused variables are a sign of poor code quality, so eliminating them should be a priority. In this code, main uses neither argc nor argv and so that function should be int main(). My compiler also tells me that no_vert is unused in extract_min() and no_edge is unused in min_spantree(). Your compiler is probably also smart enough to tell you that, if you ask it to do so.

Eliminate unused functions

The traverse() function is not used. It appears that perhaps it was once used and then the only call commented out. Eliminating it makes the code easier to understand and maintain.

Eliminate unused headers

Nothing from the <limits.h> appears to actually have been used within this code, so it would be best to eliminate it.

Eliminate global variables where practical

The code declares and uses 5 global variables. Global variables obfuscate the actual dependencies within code and make maintenance and understanding of the code that much more difficult. It also makes the code harder to reuse. For all of these reasons, it's generally far preferable to eliminate global variables and to instead pass pointers to them. That way the linkage is explicit and may be altered more easily if needed. For example, to eliminate the adj_top global, the find_adj() function could be rewritten like this:

int find_adj(int u, int no_vert)
{
 int i;
 int adj_top = 0;
 for (i = 0; i < no_vert; i++) {
 if (graph[u][i] != 0) {
 adj[adj_top++] = i;
 }
 }
 return adj_top;
}

Within the context that it's called in min_spantree the call to it becomes:

int adj_top = find_adj(u, no_vert);
for (i = 0; i < adj_top; i++) { 
 ... 
}

Consider regularizing function names

The functions init_q, free_q, enqueue, is_empty, extract_min, isin_q, all deal with the same queue structure. In some places, as with init_q and free_q you have used a _q suffix which suggests this. I'd recommend extending and enhancing that practice and rename the functions q_init, q_free, q_push, q_is_empty, q_get_min, q_contains. By making the function names more regular, it's easy to see at a glance that any q_ function deals with a queue and the names are an attempt to concisely represent what the function does with the queue.

Check return values for `malloc`

The two places you use malloc in the code immediately use the return value as though it were a valid pointer. However, what if you're out of memory and the allocation failed? In such cases, malloc returns NULL and so the code would make a bad situation (out of memory) even worse by dereferencing NULL and causing a crash. Better is to simply add a check and abort if the return value is NULL before dereferencing.

Rethink your data structures

Prim's algorithm is designed to create a minimum spanning tree, but strangely enough, no tree structure is actually used within the code. That's not necessarily a problem, but it's worth thinking about. Also, the parent and key arrays are actually tightly coupled and one is never altered without the other. This strongly suggests that what is actually called for is an array of a struct containing both. Finally, the graph structure really only needs to be large enough to contain no_vert points. Dynamically, rather than statically allocating the size would reduce wasted space and clarify what it contains.

Eliminate redundant loops

Within min_spantree the code loops through all vertices and initializes the key and parent values. It then loops through the exact same nodes and adds them to the queue. It would make more sense to do everything in a single loop through since they are not dependent.

Consider using a `for` loop rather than `while` where appropriate

In the isin_q() routine, the code currently uses a while loop. It's not wrong as written, but when a loop has an initialization, a continuation condition and an iteration step, it might be more appropriately written as a for loop. As rewritten (and renamed as mentioned above) this would become:

int q_contains(struct queue *q, int v)
{
 struct q_node *q_inst;
 for (q_inst = q->head; q_inst; q_inst = q_inst->next)
 if (q_inst->val == v)
 return 1;
 }
 return 0;
}

Avoid using the same variable for two different things

The code currently defines and uses MAX_INT as both the size of various arrays and as a maximum possible link cost. These are actually two different meanings that are not necessarily related. For example, one could imagine that the graph array could be dynamically sized to the number of vertices and the maximum possible link cost could be some completely different value such as INT_MAX as defined in <limits.h>.

Stack Exchange Network

Minimum Spanning Tree using Prim's algorithm

2 Answers 2

Add comments

Consider a more user-friendly input format

Perform input sanitation

Eliminate unused variables

Eliminate unused functions

Eliminate unused headers

Eliminate global variables where practical

Consider regularizing function names

Check return values for `malloc`

Rethink your data structures

Eliminate redundant loops

Consider using a `for` loop rather than `while` where appropriate

Avoid using the same variable for two different things

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Minimum Spanning Tree using Prim's algorithm

2 Answers 2

Add comments

Consider a more user-friendly input format

Perform input sanitation

Eliminate unused variables

Eliminate unused functions

Eliminate unused headers

Eliminate global variables where practical

Consider regularizing function names

Check return values for malloc

Rethink your data structures

Eliminate redundant loops

Consider using a for loop rather than while where appropriate

Avoid using the same variable for two different things

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions

Check return values for `malloc`

Consider using a `for` loop rather than `while` where appropriate