I have played recently and implemented doubly linked list with merge sort algorithm. I was wondering if there is any way of improving the algorithm further to make it as fast as possible (I know about implementing list as array of void*
pointers but I want to implement that as a separate structure).
I've gone from something like merge sort in learn C the hard way and added list_split function to split list in half and improved merging so it only links lists together without calling list_push/list_pop procedure which uselessly call calloc and free. That way I was able to get from ~3.3s to 0.9s needed for sorting 1,000,000 integers in list.
Also any comments about style and hints for possible bugs are welcome.
src/dllist.c:
#include <stdlib.h>
#include <assert.h>
#include "dllist.h"
struct node {
struct node *next;
struct node *prev;
void *data;
};
struct list {
unsigned int count;
struct node *first;
struct node *last;
};
List list_create()
{
List l;
l = calloc(1, sizeof(*l));
assert(l);
return l;
}
void list_destroy(List l)
{
free(l);
}
static inline void list_bond(struct node *first, struct node *second)
{
first->next = second;
second->prev = first;
}
void list_push_first(List l, void *data)
{
struct node *n;
n = calloc(1, sizeof(*n));
assert(n);
n->data = data;
if(l->count > 1) {
list_bond(n,l->first);
l->first = n;
} else {
l->first = n;
l->last = n;
}
l->count++;
}
void list_push_last(List l, void *data)
{
struct node *n;
n = calloc(1, sizeof(*n));
assert(n);
n->data = data;
if(l->count > 1) {
list_bond(l->last, n);
l->last = n;
} else {
l->first = n;
l->last = n;
}
l->count++;
}
/* if count == 0 return NULL! */
static struct node *list_find_nth(List l, unsigned int c)
{
struct node *tmp;
unsigned int i;
assert(c <= l->count+1);
if(l->count == 0)
return NULL;
if(c<=l->count/2) {
tmp = l->first;
for(i = 1; i<c; i++) {
tmp = tmp->next;
}
} else {
tmp = l->last;
for(i = l->count; i>=c; i--) {
tmp = tmp->prev;
}
}
return tmp;
}
void list_append_nth(List l, void *data, unsigned int c)
{
struct node *n;
struct node *tmp;
n = calloc(1, sizeof(*n));
assert(n);
n->data = data;
assert(c <= l->count+1);
tmp = list_find_nth(l, c);
if(tmp != NULL) {
n->prev = tmp;
n->next = tmp->next;
tmp->next = n;
if(n->next != NULL)
n->next->prev = n;
} else {
l->first = n;
l->last = n;
}
l->count++;
}
void *list_pop_first(List l)
{
void *data = NULL;
struct node *tmp;
if(l->count != 0) {
tmp = l->first;
data = tmp->data;
if (l->count == 1) {
l->first = NULL;
l->last = NULL;
} else {
l->first = l->first->next;
l->first->prev = NULL;
}
free(tmp);
l->count--;
}
return data;
}
void *list_pop_last(List l)
{
void *data = NULL;
struct node *tmp;
if(l->count != 0) {
tmp = l->last;
data = tmp->data;
if(l->count > 1) {
l->last = l->last->prev;
l->last->next = NULL;
} else {
l->last = NULL;
l->first = NULL;
}
free(tmp);
l->count--;
}
return data;
}
void *list_pop_nth(List l, unsigned int c)
{
void *data = NULL;
struct node *tmp;
if(l->count != 0) {
tmp = list_find_nth(l, c);
data = tmp->data;
if(l->count > 1) {
tmp->prev->next = tmp->next;
tmp->next->prev = tmp->prev;
} else {
l->last = NULL;
l->first = NULL;
}
free(tmp);
l->count--;
}
return data;
}
/* split lists in half */
static void list_split(List left, List right)
{
struct node *n;
if(left->count > 2) {
n = list_find_nth(left, left->count/2);
right->first = n->next;
right->last = left->last;
right->count = left->count - left->count/2;
left->count = left->count/2;
left->last = n;
n->next->prev = NULL;
n->next = NULL;
} else {
right->last = left->last;
right->first = left->last;
left->last = left->first;
left->last->next = NULL;
right->first->prev = NULL;
left->count = 1;
right->count = 1;
}
}
/* print data in list */
void list_print(List l, print f)
{
unsigned int i;
struct node *tmp;
if(l->count != 0) {
tmp = l->first;
for(i = 0;i<l->count;i++)
{
f(tmp->data);
tmp = tmp->next;
}
}
}
/*
* DO NOT DELETE (yet)
* for debugging info about list
*/
void list_debug(List l, print f)
{
struct node *tmp;
int i;
i = 0;
tmp = l->first;
printf("List: count %d, first %d, last %d\n", l->count, l->first, l->last);
while (tmp!= NULL) {
printf("%d. node. Data: ",i);
f(tmp->data);
printf("at address %d. Next: %d Prev: %d \n",tmp,tmp->next,tmp->prev);
tmp = tmp->next;
i++;
}
printf("\n");
}
static inline List list_merge(List left, List right, List_compare cmp)
{
struct node *tmp_right;
struct node *tmp_left;
List result;
void *val = NULL;
result = list_create();
/* get first element (smallest or biggest debending on cmp) of two lists and add it to result */
if (cmp(left->first->data, right->first->data) <= 0) {
result->first = left->first;
result->last = left->first;
tmp_right = result->first->next;
tmp_left = right->first;
} else {
result->first = right->first;
result->last = right->first;
tmp_right = result->first->next;
tmp_left = left->first;
}
/* Run through left and right lists to append another nodes to result till one of them is empty */
while(tmp_right != NULL && tmp_left != NULL) {
if(cmp(tmp_right->data, tmp_left->data) <= 0) {
list_bond(result->last, tmp_right);
result->last = tmp_right;
tmp_right = tmp_right->next;
} else {
list_bond(result->last, tmp_left);
result->last = tmp_left;
tmp_left = tmp_left->next;
}
}
/* append the remaining nodes of one of the lists */
if(tmp_right == NULL) {
list_bond(result->last, tmp_left);
result->last = left->last;
}
if(tmp_left == NULL) {
list_bond(result->last, tmp_right);
result->last = right->last;
}
result->count = left->count +right->count;
list_destroy(left);
list_destroy(right);
return result;
}
List list_merge_sort(List left, List_compare cmp)
{
List right, sort_left, sort_right;
if(left->count <= 1) {
return left;
}
right = list_create();
list_split(left,right);
sort_left = list_merge_sort(left, cmp);
sort_right = list_merge_sort(right, cmp);
return list_merge(sort_left, sort_right, cmp);
}
src/dllist.h
#ifndef dllist_h
#define dllist_h
/* basic list type */
typedef struct list *List;
typedef int (*List_compare)(const void *a, const void *b);
typedef void (print)(void *);
/* function prototypes generated by cproto */
#include "dllist.p"
#endif
src/test_list.c
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <values.h>
#include <time.h>
#include "dllist.h"
#define TEST_SIZE (1000000)
int compare(const void *a, const void *b)
{
return (*(const int *)a > *(const int *)b);
}
void printnum(void *a)
{
printf("%d ",*(int *)a);
}
int
main(int argc, char **argv)
{
List l;
int *data;
int i;
int a;
int *r;
time_t t;
(void)argc;
(void)argv;
srand((unsigned) time(&t));
data = calloc(TEST_SIZE, sizeof(*data));
a = rand()%1000;
for(i = 0;i<TEST_SIZE;i++)
{
data[i] = rand()%1000;
}
l = list_create();
for(i = TEST_SIZE/2-1;i>=0;i--)
{
list_push_first(l,&data[i]);
}
for(i = TEST_SIZE/2;i<TEST_SIZE;i++)
{
list_push_last(l,&data[i]);
}
l = list_merge_sort(l, &compare);
for(i = 0;i<TEST_SIZE;i++)
{
r = list_pop_first(l);
}
list_destroy(l);
free(data);
return 0;
}
src/Makefile
# variables. SOURCES is exported from makefile one folder up and so is DEBUG
OBJECTS=$(SOURCES:.c=.o)
PROTO=$(SOURCES:.c=.p)
STATPROTO=$(SOURCES:.c=.sp)
DEPS=$(SOURCES:.c=.d)
# Generate and include dependencies:
all: $(DEPS) $(OBJECTS)
%.o: %.c %.d
$(CC) $(WARNINGS) $(DEBUG) $(COMPILER) -c -o $@ $<
%.d: %.c
$(CC) -MM -MG -MF $@ $<
# Generate function prototypes
%.p: %.c
cproto $< > $@
%.sp: %.c
cproto -S $< > $@
.PHONY: clean
clean:
rm -f $(OBJECTS) $(DEPS) $(PROTO) $(STATPROTO)
-include $(DEPS)
and Makefile
# IMPORTANT variables
PROGRAMNAME=Game
SOURCES=test_list.c dllist.c
#locations
SRCDIR= src
OUTDIR= bin
TESTDIR=tests
OBJECTS= $(addprefix $(SRCDIR)/, $(SOURCES:.c=.o))
PROGRAM= $(addprefix $(OUTDIR)/, $(PROGRAMNAME))
WARNINGS= -W -Wall -ansi -Wextra -pedantic -Wstrict-overflow=5 -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes # turn on all possible warnings
COMPILER= -std=gnu89 -s -Os -Ofast -ffunction-sections -fdata-sections # strip, optimize for size and for performance. After that place all functions and data to separate sections
LINKER= -Wl,-Map=$(PROGRAM).map,--cref,--gc-section -Wl,--build-id=none # and with linker delete unneeded ones
DEBUG= -g
export SOURCES
export DEBUG
export COMPILER
export WARNINGS
all: $(PROGRAM)
.PHONY: objects clean
objects:
$(MAKE) -C $(SRCDIR) all
$(PROGRAM): objects
cc -o $@ $(OBJECTS) $(DEBUG) $(LINKER)
strip -S --strip-all --remove-section=.note.gnu.gold-version --remove-section=.comment --remove-section=.note --remove-section=.note.gnu.build-id --remove-section=.note.ABI-tag $@
clean:
$(MAKE) -C $(SRCDIR) clean
$(MAKE) -C $(TESTDIR) clean
rm -f $(PROGRAM) $(PROGRAM).map
-
\$\begingroup\$ You might want to remove all the commented out lines of code. It's obviously for debugging. You probably should take the tour as well. \$\endgroup\$pacmaninbw– pacmaninbw ♦2016年04月22日 12:15:58 +00:00Commented Apr 22, 2016 at 12:15
-
\$\begingroup\$ Sorry, better now? What tour? \$\endgroup\$Kostrahb– Kostrahb2016年04月22日 12:20:44 +00:00Commented Apr 22, 2016 at 12:20
-
\$\begingroup\$ Much better. The tour is codereview.stackexchange.com/tour. \$\endgroup\$pacmaninbw– pacmaninbw ♦2016年04月22日 12:29:10 +00:00Commented Apr 22, 2016 at 12:29
1 Answer 1
Once you get under a second for whole process execution it's better to start timing inside the program using a accurate timestamp function. This avoids the C runtime initialization and printing from dominating the measurement.
One suggestion I had was keeping a struct node *freelist
so you can easily grab a new node by doing
struct node *newNode = freelist;
freelist = freelist->next;
And when you free a node you can add it to the free list by doing:
oldNode->next = freelist;
freelist = oldNode;
This avoids having to call calloc
and free
over and over.
If you destroy a non empty list you leak all nodes still in the list. Similarly the rightlist
in list_split
just gets its values overwritten.
-
\$\begingroup\$ The first bug I saw earlier but somehow forgot about it. But list_split is static, so nobody sees it so it should be ok, right? Anyway thanks for nice suggestion, I'll try to implement that. \$\endgroup\$Kostrahb– Kostrahb2016年04月22日 14:08:13 +00:00Commented Apr 22, 2016 at 14:08
-
\$\begingroup\$ There should at least be a assert that the right list is empty or let it return a new right list. \$\endgroup\$ratchet freak– ratchet freak2016年04月22日 14:10:54 +00:00Commented Apr 22, 2016 at 14:10
Explore related questions
See similar questions with these tags.