This time, I have rewritten my radix sort implementation from C++ to C:
integer_sort.h
:
#ifndef INTEGER_SORT_H
#define INTEGER_SORT_H
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
void int32_sort(void* base,
const size_t num,
const size_t size,
const int32_t (*key_handler)(const void*));
#ifdef __cplusplus
}
#endif
#endif /* INTEGER_SORT_H */
integer_sort.c
:
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "integer_sort.h"
static const size_t INSERTIONSORT_THRESHOLD = 8;
static const size_t RADIX = 256;
static inline void insertion_sort(void* base,
const size_t num,
const size_t size,
const int32_t (*key_handler)(const void*),
char *const aux)
{
if (num == 1)
{
return;
}
char* p = ((char*) base) + size;
char* p_limit = ((char*) base) + num * size;
char* p_bottom = (char*) base;
for (; p != p_limit; p += size)
{
memcpy(aux, p, size);
int32_t key = key_handler(p);
char* pp = p - size;
while (pp >= p_bottom && key_handler(pp) > key)
{
memcpy(pp + size, pp, size);
pp -= size;
}
memcpy(pp + size, aux, size);
}
}
static inline size_t get_bucket_index(const int32_t key,
const size_t byte_index)
{
return (size_t)((key >> (byte_index * CHAR_BIT)) & ((size_t) 0xff));
}
static void int32_sort_impl_low(char* source,
char* target,
char* element_buffer,
const size_t num,
const size_t size,
const int32_t (*key_handler)(const void*),
const size_t byte_index)
{
if (num < INSERTIONSORT_THRESHOLD)
{
insertion_sort(source,
num,
size,
key_handler,
element_buffer);
if ((byte_index & 1) == 0)
{
memcpy(target, source, num * size);
}
return;
}
size_t bucketSizeMap[RADIX];
size_t startIndexMap[RADIX];
size_t processedMap[RADIX];
memset(bucketSizeMap, 0, sizeof(size_t) * RADIX);
memset(processedMap, 0, sizeof(size_t) * RADIX);
for (void* it = source; it != source + num * size; it += size)
{
const int32_t current_key = key_handler(it);
bucketSizeMap[get_bucket_index(current_key, byte_index)]++;
}
startIndexMap[0] = 0;
for (size_t i = 1; i < RADIX; ++i)
{
startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
}
for (char* it = source; it != source + num * size; it += size)
{
const int32_t key = key_handler(it);
const size_t bucket_index = get_bucket_index(key, byte_index);
memcpy(target + (startIndexMap[bucket_index]
+ processedMap[bucket_index]++) * size, it, size);
}
if (byte_index)
{
for (size_t i = 0; i < RADIX; ++i)
{
if (bucketSizeMap[i])
{
int32_sort_impl_low(target + startIndexMap[i] * size,
source + startIndexMap[i] * size,
element_buffer,
bucketSizeMap[i],
size,
key_handler,
byte_index - 1);
}
}
}
}
static void int32_sort_impl(char* source,
char* target,
char* element_buffer,
const size_t num,
const size_t size,
const int32_t (*key_handler)(const void*))
{
if (num < INSERTIONSORT_THRESHOLD)
{
insertion_sort(source, num, size, key_handler, element_buffer);
return;
}
size_t bucketSizeMap[RADIX];
size_t startIndexMap[RADIX];
size_t processedMap[RADIX];
memset(bucketSizeMap, 0, sizeof(size_t) * RADIX);
memset(processedMap, 0, sizeof(size_t) * RADIX);
for (void* it = source; it != source + num * size; it += size)
{
const int32_t current_key = key_handler(it);
bucketSizeMap[get_bucket_index(current_key, 3)]++;
}
startIndexMap[RADIX >> 1] = 0;
for (size_t i = (RADIX >> 1) + 1; i < RADIX; ++i)
{
startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
}
startIndexMap[0] = startIndexMap[RADIX - 1] + bucketSizeMap[RADIX - 1];
for (size_t i = 1; i < RADIX >> 1; ++i)
{
startIndexMap[i] = startIndexMap[i - 1] + bucketSizeMap[i - 1];
}
for (char* it = source; it != source + num * size; it += size)
{
const int32_t current_key = key_handler(it);
const size_t bucket_index = get_bucket_index(current_key, 3);
memcpy(target + (startIndexMap[bucket_index]
+ processedMap[bucket_index]++) * size, it, size);
}
for (size_t i = 0; i < RADIX; ++i)
{
if (bucketSizeMap[i])
{
int32_sort_impl_low(target + startIndexMap[i] * size,
source + startIndexMap[i] * size,
element_buffer,
bucketSizeMap[i],
size,
key_handler,
2);
}
}
}
void int32_sort(void* base,
const size_t num,
const size_t size,
const int32_t (*key_handler)(const void*))
{
if (num < 2 || !base || !key_handler)
{
return;
}
char *const aux = malloc(num * size);
memcpy(aux, base, num * size);
char *const element_buffer = malloc(size);
int32_sort_impl(base, aux, element_buffer, num, size, key_handler);
free(element_buffer);
free(aux);
}
main.c
:
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include "integer_sort.h"
static int int_cmp(const void* a, const void* b)
{
return (*(int*) a) - (*(int*) b);
}
static const int32_t key_int32_handler(const void* p)
{
return *(int32_t*) p;
}
static int* get_random_int_array(const size_t length)
{
srand((unsigned int) time(NULL));
int* array = malloc(sizeof(int) * length);
for (size_t i = 0; i < length; ++i)
{
array[i] = rand();
}
return array;
}
static int* copy_int_array(const int* array, const size_t length)
{
int* copy = malloc(sizeof(int) * length);
for (size_t i = 0; i < length; ++i)
{
copy[i] = array[i];
}
return copy;
}
static bool int_arrays_equal(int* array1, int* array2, size_t length)
{
for (size_t i = 0; i < length; ++i)
{
if (array1[i] != array2[i])
{
return false;
}
}
return true;
}
static size_t milliseconds()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return 1000 * tv.tv_sec + tv.tv_usec / 1000;
}
void profile(int* array, int length)
{
int* array2 = copy_int_array(array, length);
size_t start = milliseconds();
qsort(array, length, sizeof(int), int_cmp);
size_t end = milliseconds();
printf("qsort() in %zu milliseconds.\n", end - start);
start = milliseconds();
int32_sort(array2, length, sizeof(int), key_int32_handler);
end = milliseconds();
printf("int32_sort() in %zu milliseconds.\n", end - start);
printf("Algorithms agree: %d\n", int_arrays_equal(array, array2, length));
free(array2);
}
int main(int argc, const char * argv[]) {
const size_t length = 100000000;
int* array = get_random_int_array(length);
profile(array, length);
free(array);
}
Benchmark figures:
qsort() in 26707 milliseconds. int32_sort() in 9378 milliseconds. Algorithms agree: 1
Critique request:
As always, any critique is much appreciated.
1 Answer 1
Code duplication
int32_sort_impl()
and int32_sort_impl_low()
are identical except for the part where you create the start bucket. You should merge those two functions into one and just check byte_index
to see whether you should create the start bucket for the high byte or for the low bytes.
No need for CHAR_BIT
You use CHAR_BIT
in one place but what you really want is 8, since your radix is 256 and you are doing the sort 8 bits at a time. In fact, if CHAR_BIT
were 16, your sort would not work as written.
Three maps
You currently use three arrays to hold bucket information during the sort. You actually only need one array. For example, you could get rid of processedMap
if you changed:
memcpy(target + (startIndexMap[bucket_index] + processedMap[bucket_index]++) * size, it, size);
to:
memcpy(target + (startIndexMap[bucket_index]++) * size, it, size);
You could get rid of startIndexMap
by building it in place within bucketSizeMap
instead of creating a separate array for it.
Stability
Your implementation performs a stable sort, which is good but it requires a copy of the array. If you used an in-place algorithm, you would use less memory but then the sort would become unstable. Another alternative would be to switch to a LSD radix sort which is stable and faster than MSD.
You shouldn't be comparing the results of your sort vs quicksort because quicksort is unstable and your results won't match. The only reason the result comparison works in your test program is because your test arrays only contain keys without any extra data.
-
\$\begingroup\$
memcpy(target + (startIndexMap[bucket_index]++) * size, it, size);
will break the algorithm, since I need to know where each bucket begins at. \$\endgroup\$coderodde– coderodde2016年09月01日 14:18:19 +00:00Commented Sep 1, 2016 at 14:18 -
1\$\begingroup\$ @coderodde After you do all the memcpy's, you will still have all the information you need to know where each bucket begins at, because each
startIndexMap
will now be pointing at the next bucket's start index, and you know that the first bucket began at 0. See here for an example implementation. \$\endgroup\$JS1– JS12016年09月01日 17:02:12 +00:00Commented Sep 1, 2016 at 17:02 -
\$\begingroup\$ That's promising, yet I will need some time to rewrite the sort. \$\endgroup\$coderodde– coderodde2016年09月01日 17:18:26 +00:00Commented Sep 1, 2016 at 17:18