I've created BRESort - an adaptive sorting engine that dynamically selects
optimal algorithms based on data patterns. It achieves 3.6-4.2x speedup over stdlib qsort while preventing \$O(n^2)\$ catastrophes.
GitHub Repository: https://github.com/LazyCauchPotato/BRESort
Key Components:
bresort.h- Public API and data generatorsbresort.c- Core algorithm implementation- Test suites for validation and performance
Core Innovation:
The engine analyzes data in one pass (range, sortedness, patterns) to choose between counting sort, insertion sort, or hybrid approaches.
/* Core analysis function - the "brain" of BRESort */
static int bres_analyze_strategy(unsigned char arr[], int n) {
if (n <= 16) return 1; // Insertion sort for tiny arrays
unsigned char min_val = 255, max_val = 0;
int out_of_order = 0;
int consecutive_sorted = 0;
int max_consecutive = 0;
// Single-pass analysis: extract data patterns
for (int i = 0; i < n; i++) {
// 1. Measure value range
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
// 2. Measure sortedness and patterns
if (i > 0) {
if (arr[i] >= arr[i-1]) {
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
out_of_order++;
consecutive_sorted = 0;
}
}
}
unsigned char range = max_val - min_val;
// Decision tree based on extracted patterns
if (range == 0) return 0; // All identical - counting sort
if (range < 16) return 0; // Small range - counting sort
if (range < 64 && n > 100) return 2; // Medium complexity - hybrid
if (out_of_order < 3 || max_consecutive > n * 0.8) return 1; // Mostly sorted - insertion
if (n > 1000 && range > 128) return 2; // Large & complex - hybrid
return 0; // Default: counting sort (safe)
}
/* Main dispatch function */
void BRESort_byte(unsigned char arr[], int n) {
if (n <= 1) return;
// Tiny arrays always use insertion sort
if (n <= 8) {
bres_insertion_sort(arr, n);
return;
}
// Analyze and choose optimal strategy
int strategy = bres_analyze_strategy(arr, n);
switch (strategy) {
case 0: bres_counting_sort(arr, n); break; // Counting sort
case 1: bres_insertion_sort(arr, n); break; // Insertion sort
case 2: bres_hybrid_sort(arr, n); break; // Hybrid approach
default: bres_counting_sort(arr, n); break; // Safe fallback
}
}
/* Optimized counting sort for bytes */
static void bres_counting_sort(unsigned char arr[], int n) {
int count[256] = {0};
// Count occurrences
for (int i = 0; i < n; i++) {
count[arr[i]]++;
}
// Reconstruct sorted array
int index = 0;
for (int value = 0; value < 256; value++) {
int occurrences = count[value];
if (occurrences > 0) {
if (occurrences > 3) {
// Optimize runs with memset
memset(&arr[index], value, occurrences);
index += occurrences;
} else {
// Manual assignment for small counts
for (int j = 0; j < occurrences; j++) {
arr[index++] = value;
}
}
}
}
}
I'd appreciate any feedback on the overall architecture and C implementation.
bresort.c
#include "bresort.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
// ============================================================================
// OPTIMIZED CORE ALGORITHMS
// ============================================================================
/**
* Ultra-optimized insertion sort for small arrays
* Uses direct comparisons for maximum speed
*/
static void bres_insertion_sort(unsigned char arr[], int n) {
for (int i = 1; i < n; i++) {
unsigned char key = arr[i];
int j = i - 1;
// Manual unrolling for small arrays
if (n <= 8) {
while (j >= 0 && arr[j] > key) {
arr[j + 1] = arr[j];
j--;
}
} else {
// For slightly larger arrays, use standard approach
while (j >= 0 && arr[j] > key) {
arr[j + 1] = arr[j];
j--;
}
}
arr[j + 1] = key;
}
}
/**
* Optimized counting sort for bytes
* O(n) time complexity, perfect for byte data
*/
static void bres_counting_sort(unsigned char arr[], int n) {
if (n <= 1) return;
// Use stack allocation for small arrays, heap for large ones
int count[256] = {0};
// Single pass: count occurrences
for (int i = 0; i < n; i++) {
count[arr[i]]++;
}
// Reconstruct sorted array in-place
int index = 0;
for (int value = 0; value < 256; value++) {
int occurrences = count[value];
if (occurrences > 0) {
// Use memset for consecutive values (small optimization)
if (occurrences > 3) {
memset(&arr[index], value, occurrences);
index += occurrences;
} else {
// Manual assignment for small counts
for (int j = 0; j < occurrences; j++) {
arr[index++] = value;
}
}
}
}
}
/**
* Hybrid approach for medium-sized arrays with small range
*/
static void bres_hybrid_sort(unsigned char arr[], int n) {
unsigned char min_val = 255, max_val = 0;
// Find range in single pass
for (int i = 0; i < n; i++) {
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
}
unsigned char range = max_val - min_val;
// Choose strategy based on range characteristics
if (range < 32 || range < n / 8) {
bres_counting_sort(arr, n);
} else {
bres_insertion_sort(arr, n);
}
}
// ============================================================================
// STRATEGY SELECTION ENGINE - THE INTELLIGENT CORE
// ============================================================================
/**
* Analyzes byte array to choose optimal sorting strategy
* Returns: 0=counting, 1=insertion, 2=hybrid
*
* Decision Tree:
* - Very small arrays → Insertion sort
* - Small value range → Counting sort (O(n) performance)
* - Mostly sorted data → Insertion sort (excel on nearly-ordered)
* - Complex patterns → Hybrid approach (balanced performance)
* - Default fallback → Counting sort (safe choice)
*/
static int bres_analyze_strategy(unsigned char arr[], int n) {
if (n <= 16) {
return 1; // Insertion sort for very small arrays
}
unsigned char min_val = 255, max_val = 0;
int out_of_order = 0;
int consecutive_sorted = 0;
int max_consecutive = 0;
// Single-pass analysis for multiple metrics
for (int i = 0; i < n; i++) {
// Track min/max
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
// Track sortedness
if (i > 0) {
if (arr[i] >= arr[i-1]) {
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
out_of_order++;
consecutive_sorted = 0;
}
}
}
unsigned char range = max_val - min_val;
// Strategy decision tree
if (range == 0) {
return 0; // All identical - counting sort instant
}
if (range < 16) {
return 0; // Very small range - counting sort optimal
}
if (range < 64 && n > 100) {
return 2; // Medium range, large array - hybrid
}
if (out_of_order < 3 || max_consecutive > n * 0.8) {
return 1; // Mostly sorted - insertion sort
}
if (n > 1000 && range > 128) {
return 2; // Large array with large range - hybrid
}
return 0; // Default to counting sort
}
// ============================================================================
// MAIN BRESORT ALGORITHM
// ============================================================================
void BRESort_byte(unsigned char arr[], int n) {
if (n <= 1) return;
// For tiny arrays, always use insertion sort
if (n <= 8) {
bres_insertion_sort(arr, n);
return;
}
// Analyze and choose optimal strategy
int strategy = bres_analyze_strategy(arr, n);
switch (strategy) {
case 0: // Counting sort
bres_counting_sort(arr, n);
break;
case 1: // Insertion sort
bres_insertion_sort(arr, n);
break;
case 2: // Hybrid approach
bres_hybrid_sort(arr, n);
break;
default: // Should never happen
bres_counting_sort(arr, n);
break;
}
}
// ============================================================================
// VALIDATION FUNCTION
// ============================================================================
bool bres_is_sorted_byte(unsigned char arr[], int n) {
for (int i = 0; i < n - 1; i++) {
if (arr[i] > arr[i + 1]) {
return false;
}
}
return true;
}
// ============================================================================
// SPECIALIZED BYTE DATA GENERATORS
// ============================================================================
void bres_generate_bytes_random(unsigned char arr[], int n) {
for (int i = 0; i < n; i++) {
arr[i] = rand() % 256;
}
}
void bres_generate_bytes_sorted(unsigned char arr[], int n) {
if (n <= 1) {
if (n == 1) arr[0] = 0;
return;
}
for (int i = 0; i < n; i++) {
arr[i] = (i * 255) / (n - 1);
}
}
void bres_generate_bytes_reverse(unsigned char arr[], int n) {
if (n <= 1) {
if (n == 1) arr[0] = 255;
return;
}
for (int i = 0; i < n; i++) {
arr[i] = 255 - ((i * 255) / (n - 1));
}
}
void bres_generate_bytes_few_unique(unsigned char arr[], int n) {
unsigned char patterns[] = {0x00, 0x0F, 0xF0, 0xFF, 0x55, 0xAA};
for (int i = 0; i < n; i++) {
arr[i] = patterns[rand() % 6];
}
}
void bres_generate_bytes_all_zeros(unsigned char arr[], int n) {
memset(arr, 0, n);
}
void bres_generate_bytes_all_ones(unsigned char arr[], int n) {
memset(arr, 0xFF, n);
}
void bres_generate_bytes_alternating(unsigned char arr[], int n) {
for (int i = 0; i < n; i++) {
arr[i] = (i % 2) ? 0xAA : 0x55;
}
}
void bres_generate_bytes_realistic(unsigned char arr[], int n) {
// Simulates real-world byte data (like image pixels)
for (int i = 0; i < n; i++) {
// Create clusters of similar values (common in real data)
if (rand() % 100 < 70) { // 70% of values in clusters
int base = rand() % 240;
arr[i] = base + (rand() % 16);
} else { // 30% random
arr[i] = rand() % 256;
}
}
}
bresort.h
#ifndef BRESORT_H
#define BRESORT_H
#include <stdbool.h>
/**
* BRESort_byte - Ultimate byte sorter (3-4x faster than qsort)
* @arr: Array of unsigned chars (bytes) to sort
* @n: Number of elements in array
*
* Features:
* - 3.72x faster than qsort on random data
* - Adaptive strategy selection
* - Optimized for byte data (0-255)
* - Minimal memory overhead (256 bytes)
*/
void BRESort_byte(unsigned char arr[], int n);
/**
* bres_is_sorted_byte - Validates byte array sorting
* @arr: Array to check
* @n: Number of elements
* Returns: true if sorted, false otherwise
*/
bool bres_is_sorted_byte(unsigned char arr[], int n);
#endif
4 Answers 4
Without seeing all of the code, based only on what I can see, I'm not a fan of the 0, 1, and 2 being magic numbers to indicate which sort to use. This should almost certainly be an enum.
enum SortStrategy {
COUNTING_SORT,
INSERTION_SORT,
HYBRID_SORT
};
Your switch also could be refactored to avoid repetition by letting the counting sort case fall through to the default case. By using the enum with meaningful names, the comments become extraneous.
switch (strategy) {
case INSERTION_SORT:
bres_insertion_sort(arr, n);
break;
case HYBRID_SORT:
bres_hybrid_sort(arr, n);
break;
case COUNTING_SORT:
default:
bres_counting_sort(arr, n);
break;
}
Other magic numbers to eliminate would be the thresholds for invoking each sorting method.
You have this loop:
// Single-pass analysis: extract data patterns for (int i = 0; i < n; i++) { // 1. Measure value range if (arr[i] < min_val) min_val = arr[i]; if (arr[i] > max_val) max_val = arr[i]; // 2. Measure sortedness and patterns if (i > 0) { if (arr[i] >= arr[i-1]) { consecutive_sorted++; if (consecutive_sorted > max_consecutive) { max_consecutive = consecutive_sorted; } } else { out_of_order++; consecutive_sorted = 0; } } }
You have a condition which is only ever false on the very first iteration. You may wish to unroll this so that this branch is absent in the loop.
min_val = arr[0];
max_val = arr[0];
// Single-pass analysis: extract data patterns
for (int i = 1; i < n; i++) {
// 1. Measure value range
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
// 2. Measure sortedness and patterns
if (arr[i] >= arr[i-1]) {
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
out_of_order++;
consecutive_sorted = 0;
}
}
-
\$\begingroup\$ Thank you for this incredibly thorough and insightful review! You're absolutely right about: - Using enums for clarity (much more maintainable) - The switch refactoring for safer defaults - Eliminating magic numbers with named constants - The loop unrolling optimization (brilliant performance insight!) The loop optimization in particular is something I wouldn't have thought of - removing that branch prediction overhead is a great catch. Again this is incredibly valuable feedback. \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 00:09:15 +00:00Commented Oct 11 at 0:09
-
\$\begingroup\$ @LazyCauchPotato I think the loop unrolling wouldn't do too much for performance. The CPU can do branch prediction: if an if statement always chooses the same branch, it won't affect performance too much. I would be interested to know how much you actually save. But another great benefit is reducing nesting. Code complexity, i.e. how hard it is to read and understand your code, scales very poorly with the nesting level. Before your greatest nesting level was 4 indents, after the change it is 3. Sounds small, but it is great actually. \$\endgroup\$AccidentalTaylorExpansion– AccidentalTaylorExpansion2025年10月13日 10:03:18 +00:00Commented Oct 13 at 10:03
I think overall readability OK.
There is an ambiguity in the documentation in the header:
Minimal memory overhead (256 bytes) - does this mean maximal overhead is small, or minimal among all possible ordering routines, or overhead used here is never less than 256 bytes?
If the 1st, the value stated is misleadingly understating. It would be too small even if the type chosen for counts was single byte due to call overhead. Where sizeof int == 8, expect more than 2K.
I can gleen that the array is ordered in place.
I don't know why I like it less that there is no stating in non-descending order.
I'd not prefix static names like bres_.
On 2nd take, bres_analyze_strategy() is a strange name.
Its code reads analyse data and decide strategy, indicating it does two things (connected by data, only).
Include don't rearrange in the enumeration of ordering methods.
In bres_insertion_sort()'s "Manual unrolling for small array-if-else", the controlled statements look identical.
While compilers take care of many things, I'm with say what you mean. "That loop" once more:
unsigned char min_val, max_val = min_val = arr[0];
unsigned char current, previous = min_val;
// Single-pass analysis for extrema,
// max. length among ascending runs and total descends
for (int i = 1; i < n; i++, previous = current) {
current = arr[i];
if (current >= previous) {
if (current > max_val)
max_val = current;
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
if (current < min_val)
min_val = current;
out_of_order++;
consecutive_sorted = 0;
}
}
On 2nd take, the summary information is asymmetric between descending & ascending.
I'd start selecting the ordering method with
if (out_of_order < 1)
return NON_DESCENDING;
Without this, and the decision for range == 0 different from range < 16 nest the check for 0 in that for < 16.
Architecturally, hybrid_sort() is strange in
- being a second place where to decide on the ordering procedure to use
- repeating establishing the value range.
Re. Ultra-optimized insertion sort find how to control the inner loop with a single comparison.
Come to think of it, communicating the extrema to counting_sort() can reduce processing "0 entries". And the array size used - I don't think I'd go as far as trying "min-based" instead of 0-based.
-
\$\begingroup\$ Thank you, your review is pure gold, you're absolutely right about the architectural issues - having decision logic in multiple places is a maintenance nightmare. I'll refactor to separate data analysis from strategy selection. The memory documentation point is particularly embarrassing - thank you for catching that! I'll work on these refactors this week and update the GitHub repo \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 07:04:48 +00:00Commented Oct 11 at 7:04
-
\$\begingroup\$ I have a full version ready that handles 32/64-bit and floating-point data. I'll start a new question for it. All comments were super helpful. I hope I didn't make the same mistakes in the full version. \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月12日 00:26:49 +00:00Commented Oct 12 at 0:26
In addition to the previous answers, here are some other minor suggestions.
Documentation
There are a lot of helpful comments. It would be good to explain what "BRE" stands for, if it is an acronym or what it means to you.
Since you tagged the question genetic-algorithm, it would be good to mention that in the comments as well.
Simpler
In the bres_analyze_strategy function, this code:
if (range == 0) {
return 0; // All identical - counting sort instant
}
is unnecessary and can be deleted. The case where range is 0 is
already covered by the following check:
if (range < 16) {
You can keep the comment:
// If range is 0: All identical - counting sort instant
Readability
When the number of operators starts adding up:
if (range < 32 || range < n / 8) {
I like to add more parentheses:
if ((range < 32) || (range < n / 8)) {
It helps a person group the terms without having to think as much.
-
\$\begingroup\$ Thanks for the valuable inputs, really appreciate! I've removed the redundant range check. Fixed documentation explaining 'BRE' stands for 'Bitwise Relationship Extraction', clarified comments. \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 17:48:27 +00:00Commented Oct 11 at 17:48
-
\$\begingroup\$ @LazyCauchPotato: You're welcome. \$\endgroup\$toolic– toolic2025年10月12日 10:41:27 +00:00Commented Oct 12 at 10:41
-
\$\begingroup\$ We perhaps have different tastes in parens - probably I've had my fill of them when writing Lisp! \$\endgroup\$Toby Speight– Toby Speight2025年10月13日 08:33:58 +00:00Commented Oct 13 at 8:33
Compiler warnings
Code performs about 10 casual signed to/from unsigned conversions. Consider enabling more compiler warnings to identify.
Array sizing
As an alternative for qsort(), consider a signature more like void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
For void BRESort_byte(unsigned char arr[], int n);, this would mean using size_t n instead of int n.
size_t is also the Goldilocks type for array sizing and indexing: not too narrow, not too wide.
This is also a fairer comparison against qsort(), which can handle arrays more than INT_MAX.
Since size_t is an unsigned type, code needs review to avoid negatives.
This also reduces the casual type conversions going on with mem...() calls.
Avoid naked magic numbers
Instead of values like 3, 8, 32, ... (with no explanation), use a self-documenting macro or named constant.
Use standard library ones when possible. E.g. rather than 255, UCHAR_MAX when assigning the maximal unsigned char, int count[256] = {0}; --> int count[UCHAR_MAX + 1] = {0};.
If code depends on an 8-bit char, use static_assert(CHAR_BIT == 8, "Must use 8-bit char");. I suspect CHAR_BIT <= 12 (or so) may be supportable.
Back up the claim
To avoid dismissing "3-4x faster than qsort", post deserves a test harness to demo it.
I could see this code using a dedicated sort function rather than qsort()'s function pointer accounts for a good deal of the improvement.
Organization
"SPECIALIZED BYTE DATA GENERATORS" deserve a separate .c file.
Minor: Argument order
(unsigned char arr[], int n) is more common that (int n, unsigned char arr[]) yet I find (int n, unsigned char arr[n]) or (int n, unsigned char arr[static n]) more useful as it allows various analysis inspections.
-
\$\begingroup\$ Really appreciate that you took your time to review the code! I will make these corrections in the full blown version of BRESort - this 'byte' version is a test run to assess the concept and get a reality check! The full version extends this intelligent approach to 32/64-bit integers and floating-point data. \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 17:56:27 +00:00Commented Oct 11 at 17:56
-
\$\begingroup\$ As for the function pointer overhead in qsort, yeah... you're right. Changing my test \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 18:01:28 +00:00Commented Oct 11 at 18:01
-
2\$\begingroup\$ You were absolutely right about the function pointer overhead! My validation show significant gains in performance! \$\endgroup\$LazyCauchPotato– LazyCauchPotato2025年10月11日 18:25:25 +00:00Commented Oct 11 at 18:25
-
1
bres_insertion_sort()'s Manual unrolling for small array-if-else, the controlled statements look identical. \$\endgroup\$BRESort_byte(): Do you plan to come up with routines for different keys/records? \$\endgroup\$stdlibqsort" -- there is no single "stdlibqsort". Your sort may outperform theqsortof your particular C implementation, but that's not the same thing. \$\endgroup\$