MSD radix sort in C

Question 1

This time, I have rewritten my radix sort implementation from C++ to C:

integer_sort.h:

#ifndef INTEGER_SORT_H
#define INTEGER_SORT_H
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
void int32_sort(void* base,
 const size_t num,
 const size_t size,
 const int32_t (*key_handler)(const void*));
#ifdef __cplusplus
}
#endif
#endif /* INTEGER_SORT_H */

integer_sort.c:

#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "integer_sort.h"
static const size_t INSERTIONSORT_THRESHOLD = 8;
static const size_t RADIX = 256;
static inline void insertion_sort(void* base,
 const size_t num,
 const size_t size,
 const int32_t (*key_handler)(const void*),
 char *const aux)
{
 if (num == 1)
 {
 return;
 }
 char* p = ((char*) base) + size;
 char* p_limit = ((char*) base) + num * size;
 char* p_bottom = (char*) base;
 for (; p != p_limit; p += size)
 {
 memcpy(aux, p, size);
 int32_t key = key_handler(p);
 char* pp = p - size;
 while (pp >= p_bottom && key_handler(pp) > key)
 {
 memcpy(pp + size, pp, size);
 pp -= size;
 }
 memcpy(pp + size, aux, size);
 }
}
static inline size_t get_bucket_index(const int32_t key,
 const size_t byte_index)
{
 return (size_t)((key >> (byte_index * CHAR_BIT)) & ((size_t) 0xff));
}
static void int32_sort_impl_low(char* source,
 char* target,
 char* element_buffer,
 const size_t num,
 const size_t size,
 const int32_t (*key_handler)(const void*),
 const size_t byte_index)
{
 if (num < INSERTIONSORT_THRESHOLD)
 {
 insertion_sort(source,
 num,
 size,
 key_handler,
 element_buffer);
 if ((byte_index & 1) == 0)
 {
 memcpy(target, source, num * size);
 }
 return;
 }
 size_t bucketSizeMap[RADIX];
 size_t startIndexMap[RADIX];
 size_t processedMap[RADIX];
 memset(bucketSizeMap, 0, sizeof(size_t) * RADIX);
 memset(processedMap, 0, sizeof(size_t) * RADIX);
 for (void* it = source; it != source + num * size; it += size)
 {
 const int32_t current_key = key_handler(it);
 bucketSizeMap[get_bucket_index(current_key, byte_index)]++;
 }
 startIndexMap[0] = 0;
 for (size_t i = 1; i < RADIX; ++i)
 {
 startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
 }
 for (char* it = source; it != source + num * size; it += size)
 {
 const int32_t key = key_handler(it);
 const size_t bucket_index = get_bucket_index(key, byte_index);
 memcpy(target + (startIndexMap[bucket_index]
 + processedMap[bucket_index]++) * size, it, size);
 }
 if (byte_index)
 {
 for (size_t i = 0; i < RADIX; ++i)
 {
 if (bucketSizeMap[i])
 {
 int32_sort_impl_low(target + startIndexMap[i] * size,
 source + startIndexMap[i] * size,
 element_buffer,
 bucketSizeMap[i],
 size,
 key_handler,
 byte_index - 1);
 }
 }
 }
}
static void int32_sort_impl(char* source,
 char* target,
 char* element_buffer,
 const size_t num,
 const size_t size,
 const int32_t (*key_handler)(const void*))
{
 if (num < INSERTIONSORT_THRESHOLD)
 {
 insertion_sort(source, num, size, key_handler, element_buffer);
 return;
 }
 size_t bucketSizeMap[RADIX];
 size_t startIndexMap[RADIX];
 size_t processedMap[RADIX];
 memset(bucketSizeMap, 0, sizeof(size_t) * RADIX);
 memset(processedMap, 0, sizeof(size_t) * RADIX);
 for (void* it = source; it != source + num * size; it += size)
 {
 const int32_t current_key = key_handler(it);
 bucketSizeMap[get_bucket_index(current_key, 3)]++;
 }
 startIndexMap[RADIX >> 1] = 0;
 for (size_t i = (RADIX >> 1) + 1; i < RADIX; ++i)
 {
 startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
 }
 startIndexMap[0] = startIndexMap[RADIX - 1] + bucketSizeMap[RADIX - 1];
 for (size_t i = 1; i < RADIX >> 1; ++i)
 {
 startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
 }
 for (char* it = source; it != source + num * size; it += size)
 {
 const int32_t current_key = key_handler(it);
 const size_t bucket_index = get_bucket_index(current_key, 3);
 memcpy(target + (startIndexMap[bucket_index]
 + processedMap[bucket_index]++) * size, it, size);
 }
 for (size_t i = 0; i < RADIX; ++i)
 {
 if (bucketSizeMap[i])
 {
 int32_sort_impl_low(target + startIndexMap[i] * size,
 source + startIndexMap[i] * size,
 element_buffer,
 bucketSizeMap[i],
 size,
 key_handler,
 2);
 }
 }
}
void int32_sort(void* base,
 const size_t num,
 const size_t size,
 const int32_t (*key_handler)(const void*))
{
 if (num < 2 || !base || !key_handler)
 {
 return;
 }
 char *const aux = malloc(num * size);
 memcpy(aux, base, num * size);
 char *const element_buffer = malloc(size);
 int32_sort_impl(base, aux, element_buffer, num, size, key_handler);
 free(element_buffer);
 free(aux);
}

main.c:

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include "integer_sort.h"
static int int_cmp(const void* a, const void* b)
{
 return (*(int*) a) - (*(int*) b);
}
static const int32_t key_int32_handler(const void* p)
{
 return *(int32_t*) p;
}
static int* get_random_int_array(const size_t length)
{
 srand((unsigned int) time(NULL));
 int* array = malloc(sizeof(int) * length);
 for (size_t i = 0; i < length; ++i)
 {
 array[i] = rand();
 }
 return array;
}
static int* copy_int_array(const int* array, const size_t length)
{
 int* copy = malloc(sizeof(int) * length);
 for (size_t i = 0; i < length; ++i)
 {
 copy[i] = array[i];
 }
 return copy;
}
static bool int_arrays_equal(int* array1, int* array2, size_t length)
{
 for (size_t i = 0; i < length; ++i)
 {
 if (array1[i] != array2[i])
 {
 return false;
 }
 }
 return true;
}
static size_t milliseconds()
{
 struct timeval tv;
 gettimeofday(&tv, NULL);
 return 1000 * tv.tv_sec + tv.tv_usec / 1000;
}
void profile(int* array, int length)
{
 int* array2 = copy_int_array(array, length);
 size_t start = milliseconds();
 qsort(array, length, sizeof(int), int_cmp);
 size_t end = milliseconds();
 printf("qsort() in %zu milliseconds.\n", end - start);
 start = milliseconds();
 int32_sort(array2, length, sizeof(int), key_int32_handler);
 end = milliseconds();
 printf("int32_sort() in %zu milliseconds.\n", end - start);
 printf("Algorithms agree: %d\n", int_arrays_equal(array, array2, length));
 free(array2);
}
int main(int argc, const char * argv[]) {
 const size_t length = 100000000;
 int* array = get_random_int_array(length);
 profile(array, length);
 free(array);
}

Benchmark figures:


qsort() in 26707 milliseconds.
int32_sort() in 9378 milliseconds.
Algorithms agree: 1

Critique request:

As always, any critique is much appreciated.

Question 2

Code duplication

int32_sort_impl() and int32_sort_impl_low() are identical except for the part where you create the start bucket. You should merge those two functions into one and just check byte_index to see whether you should create the start bucket for the high byte or for the low bytes.

No need for CHAR_BIT

You use CHAR_BIT in one place but what you really want is 8, since your radix is 256 and you are doing the sort 8 bits at a time. In fact, if CHAR_BIT were 16, your sort would not work as written.

Three maps

You currently use three arrays to hold bucket information during the sort. You actually only need one array. For example, you could get rid of processedMap if you changed:

 memcpy(target + (startIndexMap[bucket_index]
 + processedMap[bucket_index]++) * size, it, size);

to:

 memcpy(target + (startIndexMap[bucket_index]++) * size, it, size);

You could get rid of startIndexMap by building it in place within bucketSizeMap instead of creating a separate array for it.

Stability

Your implementation performs a stable sort, which is good but it requires a copy of the array. If you used an in-place algorithm, you would use less memory but then the sort would become unstable. Another alternative would be to switch to a LSD radix sort which is stable and faster than MSD.

You shouldn't be comparing the results of your sort vs quicksort because quicksort is unstable and your results won't match. The only reason the result comparison works in your test program is because your test arrays only contain keys without any extra data.

Question 3

memcpy(target + (startIndexMap[bucket_index]++) * size, it, size); will break the algorithm, since I need to know where each bucket begins at.

Question 4

@coderodde After you do all the memcpy's, you will still have all the information you need to know where each bucket begins at, because each startIndexMap will now be pointing at the next bucket's start index, and you know that the first bucket began at 0. See here for an example implementation.

Question 5

That's promising, yet I will need some time to rewrite the sort.

JS1 JS1 28.8k3 gold badges41 silver badges83 bronze badges · Answer 1 · 2016-09-01 07:47:47Z

Code duplication

int32_sort_impl() and int32_sort_impl_low() are identical except for the part where you create the start bucket. You should merge those two functions into one and just check byte_index to see whether you should create the start bucket for the high byte or for the low bytes.

No need for CHAR_BIT

You use CHAR_BIT in one place but what you really want is 8, since your radix is 256 and you are doing the sort 8 bits at a time. In fact, if CHAR_BIT were 16, your sort would not work as written.

Three maps

You currently use three arrays to hold bucket information during the sort. You actually only need one array. For example, you could get rid of processedMap if you changed:

 memcpy(target + (startIndexMap[bucket_index]
 + processedMap[bucket_index]++) * size, it, size);

to:

 memcpy(target + (startIndexMap[bucket_index]++) * size, it, size);

You could get rid of startIndexMap by building it in place within bucketSizeMap instead of creating a separate array for it.

Stability

Your implementation performs a stable sort, which is good but it requires a copy of the array. If you used an in-place algorithm, you would use less memory but then the sort would become unstable. Another alternative would be to switch to a LSD radix sort which is stable and faster than MSD.

You shouldn't be comparing the results of your sort vs quicksort because quicksort is unstable and your results won't match. The only reason the result comparison works in your test program is because your test arrays only contain keys without any extra data.

memcpy(target + (startIndexMap[bucket_index]++) * size, it, size); will break the algorithm, since I need to know where each bucket begins at.
@coderodde After you do all the memcpy's, you will still have all the information you need to know where each bucket begins at, because each startIndexMap will now be pointing at the next bucket's start index, and you know that the first bucket began at 0. See here for an example implementation.
That's promising, yet I will need some time to rewrite the sort.

Stack Exchange Network

MSD radix sort in C

1 Answer 1

Code duplication

No need for CHAR_BIT

Three maps

Stability

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

MSD radix sort in C

1 Answer 1

Code duplication

No need for CHAR_BIT

Three maps

Stability

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions