Please review this doubly linked list in C. It has the basic linked list functions as you would expect plus some algorithms to perform on the list.
list.h header file:
/*
Double linked list. data represented as void*
*/
#ifndef LIST_H_
#define LIST_H_
#include <stdlib.h> // size_t def
// signature for compare / match function
typedef int(*list_compare) (const void* a, const void* b);
/* double linked list node representation
double links to next and previous node
data element represented as void* to support generic data */
typedef struct node {
struct node* next;
struct node* prev;
void* data;
} node_t;
/* list structure has following field:
size: number of nodes
destroy: user defined function for deleting dynamically allocated data element
first: head of list
last: tail of list */
typedef struct list {
size_t size;
void(*destroy)(void *data);
node_t* first;
node_t* last;
} list_t;
// Lifecycle and misc functions
/* Initialise list with zero nodes. destroy argument can be NULL or a
user defined function for custom deallocation of user supplied data
O(1) complexity */
list_t* list_init(void(*destroy)(void *data));
/* Deallocate memory used by list.
O(n) complexity - dependent on no. nodes remaining in list */
void list_free(list_t* list);
/* Returns 1 if empty, zero otherwise. O(1) complexity. */
int list_empty(list_t* list);
/* Returns size of list. O(1) complexity. */
size_t list_size(list_t* list);
// Iterators
/* Returns head of list. O(1) complexity. */
node_t* list_first(list_t* list);
/* Returns tail of list. O(1) complexity. */
node_t* list_last(list_t* list);
/* Returns node_t* after node_t* p in list. O(n) complexity worst case. */
node_t* list_next(node_t* p);
/* Returns node_t* prior to node_t* p in list. O(n) complexity worst case. */
node_t* list_previous(node_t* p);
// Setters and getters
/* Inserts element at head of list. O(1) complexity. */
void list_push_front(list_t* list, void* element);
/* Inserts element at tail of list. O(1) complexity. */
void list_push_back(list_t* list, void* element);
/* Returns data item in node at head of list. node erased. O(1) complexity. */
void* list_pop_front(list_t* list);
/* Returns data item in node at tail of list. node erased. O(1) complexity. */
void* list_pop_back(list_t* list);
/* Returns data item in node at head of list. Containing node retained.
O(1) complexity. */
void* list_top_front(list_t* list);
/* Returns data item in node at tail of list. Containing node retained.
O(1) complexity. */
void* list_top_back(list_t* list);
/* Removes node p and returns next node. O(n) complexity. */
node_t* list_remove(list_t* list, node_t* p);
/* Inserts before p and returns node pointing to data. O(n) complexity. */
node_t* list_insert(list_t* list, node_t* p, void* data);
// Algorithms on list
/* Find first item with data in list. Arguments:
list - list to search
data - data item to find
cmp - comparison function - use ordering function like memcmp
Returns found node or NULL if not found.
O(n) complexity. */
node_t* list_find(list_t* list, void* data, list_compare cmp);
/* Sort list (using mergesort). Arguments:
list - list to sort
cmp - comparison function - use ordering function like memcmp
Returns sorted list (actually will be same as list after function ends).
O(n log(n)) complexity. */
list_t* list_sort(list_t* list, list_compare cmp);
/* Reverse elements in list. Returns reversed list. O(n) complexity. */
list_t* list_reverse(list_t* list);
/* Insert list elements from list2 into list1 after node pos. list2 is
invalidated after the splice - DO NOT call list_free on list2 after splicing.
Arguments:
list1 - list to splice into
list2 - source list where nodes are moved into list1 after node pos
pos - list2 nodes are inserted after node pos. Can use NULL which inserts
list2 nodes after list1 tail node.
returns spliced list.
O(1) complexity - nodes are not copied. */
list_t* list_splice(list_t* list1, list_t* list2, node_t* pos);
/* Removes all consecutive duplicate elements from the list. Only the first
element in each group of equal elements is left.
Note that list should be sorted in order for remaining elements to be unique because
comparison is of consecutive elements.
Arguments:
list - list to remove duplicate elements
cmp - comparison function - use ordering function like memcmp
returns de-duplicated list.
O(n) complexity. */
list_t* list_unique(list_t* list, list_compare cmp);
#endif // LIST_H_
list.c implementation file:
#include "list.h"
/* helper functions */
// create a new node
static node_t* make_node(void* element) {
node_t* newnode = calloc(1, sizeof(node_t));
newnode->data = element;
return newnode;
}
// split uses tortoise and hare going at different speeds to find middle node
static node_t* split(node_t* p) {
node_t* tortoise = p;
node_t* hare = p;
while (hare->next && hare->next->next) {
tortoise = tortoise->next;
hare = hare->next->next;
}
node_t* middle = tortoise->next;
// we now want 2 lists from original
// stop list at start going into 2nd list
tortoise->next = NULL;
return middle;
}
// merge nodes based on comparison function
static node_t* merge(node_t* left, node_t* right, list_compare cmp) {
if (!left)
return right;
if (!right)
return left;
// arbitrarily choose left if they are the same
if (cmp(left->data, right->data) <= 0) {
left->next = merge(left->next, right, cmp);
left->next->prev = left;
left->prev = NULL;
return left;
}
else {
right->next = merge(left, right->next, cmp);
right->next->prev = right;
right->prev = NULL;
return right;
}
}
// mergesort algorithm on list
static node_t* mergesort(node_t* head, list_compare cmp) {
if (!head || !head->next)
return head;
node_t* left = head;
node_t* right = split(head);
left = mergesort(left, cmp);
right = mergesort(right, cmp);
return merge(left, right, cmp);
}
// swap data item in nodes
static void swap_data(node_t* n1, node_t* n2) {
void* tmp = n1->data;
n1->data = n2->data;
n2->data = tmp;
}
// append list2 onto tail of list1
static list_t* append(list_t* list1, list_t* list2) {
node_t* last1 = list1->last;
node_t* first2 = list2->first;
last1->next = list2->first;
first2->prev = last1;
list1->last = list2->last;
return list1;
}
list_t* list_init(void(*destroy)(void *data)) {
list_t* ll = calloc(1, sizeof(list_t));
ll->destroy = destroy;
return ll;
}
void list_free(list_t* list) {
node_t* it = list->first;
while (it) {
node_t* tmp = it;
it = it->next;
if (list->destroy) {
list->destroy(tmp);
}
free(tmp);
}
free(list);
}
int list_empty(list_t* list) {
return list->size == 0;
}
size_t list_size(list_t* list) {
return list->size;
}
node_t* list_first(list_t* list) {
return list->first ? list->first : NULL;
}
node_t* list_last(list_t* list) {
return list->last ? list->last : NULL;
}
node_t* list_next(node_t* p) {
return p ? p->next : NULL;
}
node_t* list_previous(node_t* p) {
return p ? p->prev : NULL;
}
void list_push_front(list_t* list, void* element) {
node_t* newnode = make_node(element);
if (list->first == NULL) {
list->first = newnode;
list->last = newnode;
}
else {
node_t* prevfirst = list->first;
list->first = newnode;
list->first->next = prevfirst;
prevfirst->prev = newnode;
}
list->size++;
}
void list_push_back(list_t* list, void* element) {
node_t* newnode = make_node(element);
if (list->first == NULL) {
list->first = newnode;
list->last = newnode;
}
else {
node_t* prevlast = list->last;
list->last = newnode;
list->last->prev = prevlast;
prevlast->next = newnode;
}
list->size++;
}
void* list_pop_front(list_t* list) {
if (list->first == NULL) {
return NULL;
}
node_t* top = list->first;
void* data = top->data;
list->first = list->first->next;
free(top);
list->size--;
if (list_empty(list)) {
list->first = NULL;
list->last = NULL;
}
else {
list->first->prev = NULL;
}
return data;
}
void* list_top_front(list_t* list) {
if (list->first == NULL) {
return NULL;
}
return list->first->data;
}
void* list_pop_back(list_t* list) {
if (list->first == NULL) {
return NULL;
}
node_t* top = list->last;
void* data = top->data;
list->last = list->last->prev;
free(top);
list->size--;
if (list_empty(list)) {
list->first = NULL;
list->last = NULL;
}
else {
list->last->next = NULL;
}
return data;
}
void* list_top_back(list_t* list) {
if (list->first == NULL) {
return NULL;
}
return list->last->data;
}
node_t* list_remove(list_t* list, node_t* p) {
if (list_empty(list) || !p)
return NULL;
for (node_t* it = list->first; it != NULL; it = it->next) {
if (p == it) {
// we have found node to remove
list->size--;
// 4 cases - only node, start, end, middle
// if only node in list
if (!it->prev && !it->next) {
if (list->destroy) {
list->destroy(it->data);
}
free(it);
list->first = list->last = NULL;
return NULL;
}
// else start
else if (!it->next) {
it->prev->next = NULL; // because we are deleting it
if (list->destroy) {
list->destroy(it->data);
}
free(it);
return NULL;
}
// else at end
else if (!it->prev) {
node_t* next = it->next;
next->prev = NULL;
if (list->destroy) {
list->destroy(it->data);
}
free(it);
list->first = next;
return next;
}
// else somewhere in middle
else {
// we have a previous and a next
node_t* next = it->next;
node_t* prior = it->prev;
next->prev = prior;
prior->next = next;
if (list->destroy) {
list->destroy(it->data);
}
free(it);
return next;
}
}
}
return NULL; // node to remove not found
}
node_t* list_insert(list_t* list, node_t* p, void* data) {
// if get to here we didn't find p - if NULL just insert into first
if (p == NULL) {
list_push_back(list, data);
return list->last;
}
for (node_t* it = list->first; it != NULL; it = it->next) {
if (p == it) {
// we have found node to insert before
if (!it->prev) {
// insert at head
list_push_front(list, data);
return list->first;
}
else {
node_t* newnode = make_node(data);
it->prev->next = newnode;
newnode->prev = it->prev;
newnode->next = it;
it->prev = newnode;
list->size++;
return newnode;
}
}
}
// if get to here we didn't find p - if NULL just insert at end of list
list_push_back(list, data);
return list->last;
}
list_t* list_sort(list_t* list, list_compare cmp) {
if (list_size(list) <= 1)
return list;
node_t* n = mergesort(list->first, cmp);
list->first = n;
// need to re-assign list->last - seek to end of list to find new last element
node_t* it = list->first;
while (it && it->next) {
it = it->next;
}
list->last = it;
return list;
}
node_t* list_find(list_t* list, void* data, list_compare cmp) {
for (node_t* it = list->first; it != NULL; it = it->next) {
if (cmp(it->data, data) == 0) {
return it;
}
}
return NULL;
}
list_t* list_reverse(list_t* list) {
node_t* fwd = list->first;
node_t* bck = list->last;
while (fwd && bck && fwd != bck && fwd != bck->next) {
swap_data(fwd, bck);
fwd = fwd->next;
bck = bck->prev;
}
return list;
}
// returns list1 after splice. list2 becomes invalidated after splice
list_t* list_splice(list_t* list1, list_t* list2, node_t* pos) {
list1->size += list2->size;
// special case pos null, append list2 on end of list1
if (pos == NULL) {
list1 = append(list1, list2);
free(list2);
return list1;
}
// find pos in list1
int found = 0;
for (node_t* it = list1->first; it != NULL; it = it->next) {
if (it == pos) {
found = 1;
node_t* next = it->next;
it->next = list2->first;
if (next) {
node_t* nextnext = next->next;
it->next = list2->last;
it->next->prev = list1->last;
nextnext = it;
free(list2);
return list1;
}
else {
// pos must have been list1->last
list1 = append(list1, list2);
free(list2);
return list1;
}
}
}
// do same as if pos = NUL
if (!found) {
list1 = append(list1, list2);
free(list2);
return list1;
}
return list1;
}
// caller must sort first
list_t* list_unique(list_t* list, list_compare cmp) {
void* previous = NULL;
for (node_t* it = list->first; it != NULL; it = it->next) {
if (previous && cmp(previous, it->data) == 0) {
// we delete this node
it = list_remove(list, it); // returns next node
// skip back one - otherwise for loop will skip a node
it = it->prev;
}
previous = it->data;
}
return list;
}
main.c driver program:
#include "list.h"
#include <stdio.h>
int mycomp(const void* a, const void* b) {
const int* ia = a;
const int* ib = b;
if (*ia > *ib) return 1;
if (*ib > *ia) return -1;
return 0;
}
int main() {
list_t* ll = list_init(NULL); // using ints stored on stack so no need for user defined destroy function.
int el1 = 1;
printf("push front %d\n", el1);
list_push_front(ll, &el1);
int el2 = 2;
printf("push front %d\n", el2);
list_push_front(ll, &el2);
int el3 = 3;
printf("push front %d\n", el3);
list_push_front(ll, &el3);
int el4 = 4;
int el5 = 5;
int el6 = 6;
printf("push back %d\n", el4);
list_push_back(ll, &el4);
printf("push back %d\n", el5);
list_push_back(ll, &el5);
printf("push back %d\n", el6);
list_push_back(ll, &el6);
printf("size of list now: %d\n", list_size(ll));
if (list_find(ll, &el5, mycomp) != NULL) {
printf("item %d found in list\n", el5);
}
else {
printf("item %d not found in list\n", el5);
}
int* rettop = list_top_back(ll);
printf("top back = %d\n", *rettop);
rettop = list_top_front(ll);
printf("top front = %d\n", *rettop);
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
int el7 = 7;
list_insert(ll, list_last(ll), &el7);
printf("after inserting 7 just before the last element\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
printf("remove each node in list\n");
node_t* current = list_first(ll);
while (current) {
printf("about to remove %p, data=%d\n", current, *(int*)current->data);
current = list_remove(ll, current);
}
// now regenerate list
int selection[] = { 85, 57, 44, 4, 24, 96, 30, 93, 96, 64 };
int size = sizeof(selection) / sizeof(selection[0]);
node_t* next = list_insert(ll, list_first(ll), &selection[0]);
for (int i = 1; i < size; ++i) {
next = list_insert(ll, next, &selection[i]);
}
printf("linked list now looks like this:\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
if (list_find(ll, &selection[7], mycomp) != NULL) {
printf("item %d found in list\n", selection[7]);
}
else {
printf("item %d not found in list\n", selection[7]);
}
int f = 108;
if (list_find(ll, &f, mycomp) != NULL) {
printf("item %d found in list\n", f);
}
else {
printf("item %d not found in list\n", f);
}
list_sort(ll, mycomp);
printf("linked list after sort now looks like this:\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
if (it->next && *p > *((const int*)it->next->data)) {
printf("sort failed: %d > %d (next data item)\n", *p, *((const int*)it->next->data));
}
}
ll = list_reverse(ll);
printf("linked list after list_reverse now looks like this:\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
// test unique
ll = list_unique(ll, mycomp);
printf("linked list after list_unique now looks like this:\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
list_t* ll2 = list_init(NULL);
int el8 = 8;
int el9 = 9;
int el10 = 10;
printf("push back %d on ll2\n", el8);
list_push_back(ll2, &el8);
printf("push back %d on ll2\n", el9);
list_push_back(ll2, &el9);
printf("push back %d on ll2\n", el10);
list_push_back(ll2, &el10);
ll = list_splice(ll, ll2, NULL);
printf("linked list 1 after splicing ll2 on end:\n");
for (node_t* it = list_first(ll); it != NULL; it = it->next) {
const int* p = it->data;
printf("ptr=%p, data=%d\n", it, *p);
}
int* ret1 = list_pop_front(ll);
printf("pop front %d\n", *ret1);
int* ret2 = list_pop_front(ll);
printf("pop front %d\n", *ret2);
int* ret3 = list_pop_front(ll);
printf("pop front %d\n", *ret3);
printf("list size now = %d\n", list_size(ll));
list_free(ll);
// do not free a list_t spliced onto another list
// ie do not call list_free(ll2);
}
5 Answers 5
Testing for
list->destroy
is done too many times. Better do it in thelist_init
:if (!destroy) { destroy = dummy_destroy; } list->destroy = destroy;
with
static void dummy_destroy(void * data) {}
list_splice
is unnecessarily verbose.- It repeats what
list_find
is for. found
is unnecessary: whenif(!found)
line is reached it is guaranteed thatp
was not there (otherwise one of the early returns was executed).The sequence
free(list2); return list1;
is repeated too many times.
The function always returns
list1
, and therefore the return value conveys no information to the caller. I'd rather make itvoid
or figure out something useful to return.As a side note, I disagree with the design decision to treat a non-existent
pos
as a signal to append. In such situation it seems more logical to fail.
All that said, consider
list_splice(list_t * list1, list_t * list2, node_t * pos) { pos = list_find(list1, pos); if (!pos) { return NULL; } list2->last->next = pos->next; pos->next->prev = list2->last; pos->next = list2->first; list2->first->prev = pos; free(list2); return list1; }
Same applies to
list_insert
andlist_remove
.- It repeats what
The
found
clause inlist_remove
can be streamlined:if (!pos->prev) { list->first = pos->next; } else { pos->prev->next = pos->next; } if (!pos->next) { list->last = pos->prev; } else { pos->next->prev = pos->prev; }
When popping the last element from the top,
list->first
automagically becomesNULL
after thelist->first = list->first->next;
assignment. An explicitlist->first = NULL
is redundant. You may want to convert it into an assertion:if (list_empty(list)) { assert(list->first == NULL); list->last = NULL; }
Same applies to
list_pop_back
.The
list_push_front
can be streamlined (notice that certain things shall happen no matter what):newnode->next = list->first; if (!list->first) { list->last = newnode; } else { list->first->prev = newnode; } list->first = newnode;
Same applies to
list_push_back
.return list->first ? list->first : NULL;
is a long way to sayreturn list->first;
As a general note, I am not sure that exposing
node_t
to a client is a good idea.
My general observations:
I can understand your code and it's apparent intent. I'd normally ask for a few more comments around the if statements to make sure a future maintainer doesn't get confused
Because you are using pointers, you have the 'destroy' function to be able to delete entries from your list. But also, because you are using pointers, you are requiring the using code to ensure that it has not taken a copy of a pointer that might ever be 'destroyed'. Unless you are assuming some kind of reference counting system within the 'destroy' function or require that a copy of an object pointer is never made then you are likely to get 'undefined behaviour'
A possible alternative is for you to return a filtered list of elements that have been removed as a result of an API call. e.g.
resultThing OperationThatRemovesFromList( list_t* myList, list_t* appendWithDiscards);
The calling code would then be responsible for destroying it's own objects when it knows it is safe to do so. (This is akin to manual garbage collection)
mallocing and freeing your list_t objects is fine since your library always has ownership of them anyway
To prevent code from assuming and using/manipulating your internal structure, you could hide the details of your list_t by having a public and private variant of them. The public header file declares no meaningful internal details and is just a byte array of the same size as the real private list_t. e.g.
struct public_list_t { char[LIST_ELEMENT_SIZE] private; }
where LIST_ELEMENT_SIZE is a macro that evaluates to the correct size depending on the size of pointers and size_t types on your system
- Unit testing: Your main function example is almost the outlines of a unit test; I would suggest that you expand it to act as a unit test of your library. After each operation, do an assert about the properties of your counted list. (I am hoping that an assert will yield a non zero exit code that can be used to perform automated unit testing on every build).
list_free(ll);
Note that after calling this line, ll
is not null, and actually a dangling pointer. Solutions:
Make this funtion return a null:
ll = list_free(ll);
or make "ll" a double pointer, and making it null form the function itself.
-
\$\begingroup\$ An alternate syntax (if you wish to use it) for freeing and nulling list references is to pass it in by reference; i.e. list_free(&ll). The calling code then doesn't have the option of not re-assigning the list to null. It won't stop them working around this by copying. \$\endgroup\$RidiculousRichard– RidiculousRichard2018年03月06日 07:54:00 +00:00Commented Mar 6, 2018 at 7:54
-
\$\begingroup\$ I don't like using references. From simple code inspections you don't know what are the effects of modifying a variable. Using pointers you know that this will be carried outside your scope. \$\endgroup\$elcuco– elcuco2018年03月12日 09:33:05 +00:00Commented Mar 12, 2018 at 9:33
I know that the code review is supposed to simply improve the code and what I'm about to suggest will probably require to rewrite most of it, however I would still like to share an approach to implementing doubly-linked list that I found particularity elegant. And the mind-blowing thing to realize about it is that most of the painstaking if-else branching can be avoided by simply introducing an additional "end" node instead of "first" and "last" pointers.
typedef struct node {
struct node *next;
struct node *prev;
void *data;
} node_t;
typedef struct list {
size_t size;
node_t end;
} list_t;
list_t *list_create()
{
list_t *list = malloc(sizeof(list_t));
list->size = 0;
list->end.next = list->end.prev = &list->end;
return list;
}
node_t *list_insert(list_t *list, node_t *next, void *data)
{
if (!next)
next = &list->end;
node_t *node = malloc(sizeof(node_t));
node->data = data;
node->next = next;
node->prev = next->prev;
node->next->prev = node->prev->next = node;
++list->size;
return node;
}
node_t *list_remove(list_t *list, node_t *node)
{
node_t *next = node->next;
node->next->prev = node->prev;
node->prev->next = node->next;
free(node);
--list->size;
return next;
}
Obviously the above functions require adding some safety checks and the rest of the functions can be implemented based on those once you get your head around this approach.
-
\$\begingroup\$ Have a look at Simula's mid-sixties classes SimSet and Link(, Linkage and Head). \$\endgroup\$greybeard– greybeard2018年03月05日 06:36:03 +00:00Commented Mar 5, 2018 at 6:36
-
\$\begingroup\$ This presents uncommented code - don't. \$\endgroup\$greybeard– greybeard2018年03月05日 06:37:18 +00:00Commented Mar 5, 2018 at 6:37
-
\$\begingroup\$ @r3mus n0x - like C++ std lib end? Interesting. \$\endgroup\$arcomber– arcomber2018年03月05日 10:45:27 +00:00Commented Mar 5, 2018 at 10:45
-
\$\begingroup\$ I am going to play around with this idea but don't you need a begin in list_t? How would you push given you may not have a node_t* ? \$\endgroup\$arcomber– arcomber2018年03月06日 12:53:57 +00:00Commented Mar 6, 2018 at 12:53
-
\$\begingroup\$ You use
list_insert(list, NULL, data)
. Since the second parameter specifies the next node, if you pass NULL it just uses the "end" node as the next node. \$\endgroup\$r3mus n0x– r3mus n0x2018年03月06日 13:22:38 +00:00Commented Mar 6, 2018 at 13:22
Compare Function Type
list_find(), list_sort(), list_uquiue()
are compare functions of the below type - much like those used in qsort()
.
typedef int(*list_compare) (const void* a, const void* b);
I have found that a compare function is more useful with a context pointer.
To do so, change the function pointer signature and add a parameter to the 3 functions.
typedef int(*list_compare) (void *context, const void* a, const void* b);
const
bool
int list_empty(list_t* list);
is a good candidate for bool
and const
. const
useful in other to take advantage and indicate list invariability.
// int list_empty(list_t* list);
bool list_empty(const list_t* list);
// size_t list_size(list_t* list);
size_t list_size(const list_t* list);
// node_t* list_first(list_t* list);
node_t* list_first(const list_t* list);
Overall I liked this code and picked up some ideas.
The commnet O(n) complexity worst case.
for slow functions is a god idea.
The one design criteria I did not favor was using and exposing the node_t
. IMO, better to hide.
At a minimum, the name should be prefixed list_
to restrict the name space impact.