My BRESort adaptive sorting engine in C

Question 1

I've created BRESort - an adaptive sorting engine that dynamically selects optimal algorithms based on data patterns. It achieves 3.6-4.2x speedup over stdlib qsort while preventing \$O(n^2)\$ catastrophes.

GitHub Repository: https://github.com/LazyCauchPotato/BRESort

Key Components:

bresort.h - Public API and data generators
bresort.c - Core algorithm implementation
Test suites for validation and performance

Core Innovation:

The engine analyzes data in one pass (range, sortedness, patterns) to choose between counting sort, insertion sort, or hybrid approaches.

/* Core analysis function - the "brain" of BRESort */
 static int bres_analyze_strategy(unsigned char arr[], int n) {
 if (n <= 16) return 1; // Insertion sort for tiny arrays
 
 unsigned char min_val = 255, max_val = 0;
 int out_of_order = 0;
 int consecutive_sorted = 0;
 int max_consecutive = 0;
 
 // Single-pass analysis: extract data patterns
 for (int i = 0; i < n; i++) {
 // 1. Measure value range
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 
 // 2. Measure sortedness and patterns
 if (i > 0) {
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }
 }
 
 unsigned char range = max_val - min_val;
 
 // Decision tree based on extracted patterns
 if (range == 0) return 0; // All identical - counting sort
 if (range < 16) return 0; // Small range - counting sort
 if (range < 64 && n > 100) return 2; // Medium complexity - hybrid
 if (out_of_order < 3 || max_consecutive > n * 0.8) return 1; // Mostly sorted - insertion
 if (n > 1000 && range > 128) return 2; // Large & complex - hybrid
 
 return 0; // Default: counting sort (safe)
 }
 
 /* Main dispatch function */
 void BRESort_byte(unsigned char arr[], int n) {
 if (n <= 1) return;
 
 // Tiny arrays always use insertion sort
 if (n <= 8) {
 bres_insertion_sort(arr, n);
 return;
 }
 
 // Analyze and choose optimal strategy
 int strategy = bres_analyze_strategy(arr, n);
 
 switch (strategy) {
 case 0: bres_counting_sort(arr, n); break; // Counting sort
 case 1: bres_insertion_sort(arr, n); break; // Insertion sort 
 case 2: bres_hybrid_sort(arr, n); break; // Hybrid approach
 default: bres_counting_sort(arr, n); break; // Safe fallback
 }
 }
 
 /* Optimized counting sort for bytes */
 static void bres_counting_sort(unsigned char arr[], int n) {
 int count[256] = {0};
 
 // Count occurrences
 for (int i = 0; i < n; i++) {
 count[arr[i]]++;
 }
 
 // Reconstruct sorted array
 int index = 0;
 for (int value = 0; value < 256; value++) {
 int occurrences = count[value];
 if (occurrences > 0) {
 if (occurrences > 3) {
 // Optimize runs with memset
 memset(&arr[index], value, occurrences);
 index += occurrences;
 } else {
 // Manual assignment for small counts
 for (int j = 0; j < occurrences; j++) {
 arr[index++] = value;
 }
 }
 }
 }
 }

I'd appreciate any feedback on the overall architecture and C implementation.

bresort.c

#include "bresort.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
// ============================================================================
// OPTIMIZED CORE ALGORITHMS
// ============================================================================
/**
 * Ultra-optimized insertion sort for small arrays
 * Uses direct comparisons for maximum speed
 */
static void bres_insertion_sort(unsigned char arr[], int n) {
 for (int i = 1; i < n; i++) {
 unsigned char key = arr[i];
 int j = i - 1;
 // Manual unrolling for small arrays
 if (n <= 8) {
 while (j >= 0 && arr[j] > key) {
 arr[j + 1] = arr[j];
 j--;
 }
 } else {
 // For slightly larger arrays, use standard approach
 while (j >= 0 && arr[j] > key) {
 arr[j + 1] = arr[j];
 j--;
 }
 }
 arr[j + 1] = key;
 }
}
/**
 * Optimized counting sort for bytes
 * O(n) time complexity, perfect for byte data
 */
static void bres_counting_sort(unsigned char arr[], int n) {
 if (n <= 1) return;
 // Use stack allocation for small arrays, heap for large ones
 int count[256] = {0};
 // Single pass: count occurrences
 for (int i = 0; i < n; i++) {
 count[arr[i]]++;
 }
 // Reconstruct sorted array in-place
 int index = 0;
 for (int value = 0; value < 256; value++) {
 int occurrences = count[value];
 if (occurrences > 0) {
 // Use memset for consecutive values (small optimization)
 if (occurrences > 3) {
 memset(&arr[index], value, occurrences);
 index += occurrences;
 } else {
 // Manual assignment for small counts
 for (int j = 0; j < occurrences; j++) {
 arr[index++] = value;
 }
 }
 }
 }
}
/**
 * Hybrid approach for medium-sized arrays with small range
 */
static void bres_hybrid_sort(unsigned char arr[], int n) {
 unsigned char min_val = 255, max_val = 0;
 // Find range in single pass
 for (int i = 0; i < n; i++) {
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 }
 unsigned char range = max_val - min_val;
 // Choose strategy based on range characteristics
 if (range < 32 || range < n / 8) {
 bres_counting_sort(arr, n);
 } else {
 bres_insertion_sort(arr, n);
 }
}
// ============================================================================
// STRATEGY SELECTION ENGINE - THE INTELLIGENT CORE
// ============================================================================
/**
 * Analyzes byte array to choose optimal sorting strategy
 * Returns: 0=counting, 1=insertion, 2=hybrid
 *
 * Decision Tree:
 * - Very small arrays → Insertion sort
 * - Small value range → Counting sort (O(n) performance)
 * - Mostly sorted data → Insertion sort (excel on nearly-ordered)
 * - Complex patterns → Hybrid approach (balanced performance)
 * - Default fallback → Counting sort (safe choice)
 */
static int bres_analyze_strategy(unsigned char arr[], int n) {
 if (n <= 16) {
 return 1; // Insertion sort for very small arrays
 }
 unsigned char min_val = 255, max_val = 0;
 int out_of_order = 0;
 int consecutive_sorted = 0;
 int max_consecutive = 0;
 // Single-pass analysis for multiple metrics
 for (int i = 0; i < n; i++) {
 // Track min/max
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 // Track sortedness
 if (i > 0) {
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }
 }
 unsigned char range = max_val - min_val;
 // Strategy decision tree
 if (range == 0) {
 return 0; // All identical - counting sort instant
 }
 if (range < 16) {
 return 0; // Very small range - counting sort optimal
 }
 if (range < 64 && n > 100) {
 return 2; // Medium range, large array - hybrid
 }
 if (out_of_order < 3 || max_consecutive > n * 0.8) {
 return 1; // Mostly sorted - insertion sort
 }
 if (n > 1000 && range > 128) {
 return 2; // Large array with large range - hybrid
 }
 return 0; // Default to counting sort
}
// ============================================================================
// MAIN BRESORT ALGORITHM
// ============================================================================
void BRESort_byte(unsigned char arr[], int n) {
 if (n <= 1) return;
 // For tiny arrays, always use insertion sort
 if (n <= 8) {
 bres_insertion_sort(arr, n);
 return;
 }
 // Analyze and choose optimal strategy
 int strategy = bres_analyze_strategy(arr, n);
 switch (strategy) {
 case 0: // Counting sort
 bres_counting_sort(arr, n);
 break;
 case 1: // Insertion sort
 bres_insertion_sort(arr, n);
 break;
 case 2: // Hybrid approach
 bres_hybrid_sort(arr, n);
 break;
 default: // Should never happen
 bres_counting_sort(arr, n);
 break;
 }
}
// ============================================================================
// VALIDATION FUNCTION
// ============================================================================
bool bres_is_sorted_byte(unsigned char arr[], int n) {
 for (int i = 0; i < n - 1; i++) {
 if (arr[i] > arr[i + 1]) {
 return false;
 }
 }
 return true;
}
// ============================================================================
// SPECIALIZED BYTE DATA GENERATORS
// ============================================================================
void bres_generate_bytes_random(unsigned char arr[], int n) {
 for (int i = 0; i < n; i++) {
 arr[i] = rand() % 256;
 }
}
void bres_generate_bytes_sorted(unsigned char arr[], int n) {
 if (n <= 1) {
 if (n == 1) arr[0] = 0;
 return;
 }
 for (int i = 0; i < n; i++) {
 arr[i] = (i * 255) / (n - 1);
 }
}
void bres_generate_bytes_reverse(unsigned char arr[], int n) {
 if (n <= 1) {
 if (n == 1) arr[0] = 255;
 return;
 }
 for (int i = 0; i < n; i++) {
 arr[i] = 255 - ((i * 255) / (n - 1));
 }
}
void bres_generate_bytes_few_unique(unsigned char arr[], int n) {
 unsigned char patterns[] = {0x00, 0x0F, 0xF0, 0xFF, 0x55, 0xAA};
 for (int i = 0; i < n; i++) {
 arr[i] = patterns[rand() % 6];
 }
}
void bres_generate_bytes_all_zeros(unsigned char arr[], int n) {
 memset(arr, 0, n);
}
void bres_generate_bytes_all_ones(unsigned char arr[], int n) {
 memset(arr, 0xFF, n);
}
void bres_generate_bytes_alternating(unsigned char arr[], int n) {
 for (int i = 0; i < n; i++) {
 arr[i] = (i % 2) ? 0xAA : 0x55;
 }
}
void bres_generate_bytes_realistic(unsigned char arr[], int n) {
 // Simulates real-world byte data (like image pixels)
 for (int i = 0; i < n; i++) {
 // Create clusters of similar values (common in real data)
 if (rand() % 100 < 70) { // 70% of values in clusters
 int base = rand() % 240;
 arr[i] = base + (rand() % 16);
 } else { // 30% random
 arr[i] = rand() % 256;
 }
 }
}

bresort.h

#ifndef BRESORT_H
#define BRESORT_H
#include <stdbool.h>
/**
 * BRESort_byte - Ultimate byte sorter (3-4x faster than qsort)
 * @arr: Array of unsigned chars (bytes) to sort
 * @n: Number of elements in array
 *
 * Features:
 * - 3.72x faster than qsort on random data
 * - Adaptive strategy selection
 * - Optimized for byte data (0-255)
 * - Minimal memory overhead (256 bytes)
 */
void BRESort_byte(unsigned char arr[], int n);
/**
 * bres_is_sorted_byte - Validates byte array sorting
 * @arr: Array to check
 * @n: Number of elements
 * Returns: true if sorted, false otherwise
 */
bool bres_is_sorted_byte(unsigned char arr[], int n);
#endif

Question 2

In bres_insertion_sort()'s Manual unrolling for small array-if-else, the controlled statements look identical.

Question 3

Pondering the name BRESort_byte(): Do you plan to come up with routines for different keys/records?

Question 4

Oh...you're absolutely right about the insertion sort bug, I accidentally left identical code in both branches. The manual unrolling should actually optimize for tiny arrays. Regarding BRESort_byte() naming: yes, I chose bytes as the starting point because counting sort is particularly effective there, but the adaptive selection concept could definitely extend to other types. I have already tested on float data with promissing results, it has slightly different analysis heuristics, but the core 'analyze then choose' is the same. Thanks for superb review...have a lot of fixing to do...

Question 5

"It achieves 3.6-4.2x speedup over stdlib qsort" -- there is no single "stdlib qsort". Your sort may outperform the qsort of your particular C implementation, but that's not the same thing.

Question 6

Frame challenge: how does it compare to TimSort?

Question 7

Without seeing all of the code, based only on what I can see, I'm not a fan of the 0, 1, and 2 being magic numbers to indicate which sort to use. This should almost certainly be an enum.

enum SortStrategy {
 COUNTING_SORT,
 INSERTION_SORT,
 HYBRID_SORT
};

Your switch also could be refactored to avoid repetition by letting the counting sort case fall through to the default case. By using the enum with meaningful names, the comments become extraneous.

 switch (strategy) { 
 case INSERTION_SORT: 
 bres_insertion_sort(arr, n); 
 break; 
 case HYBRID_SORT: 
 bres_hybrid_sort(arr, n); 
 break; 
 
 case COUNTING_SORT:
 default: 
 bres_counting_sort(arr, n); 
 break;
 }

Other magic numbers to eliminate would be the thresholds for invoking each sorting method.

You have this loop:

 // Single-pass analysis: extract data patterns
 for (int i = 0; i < n; i++) {
 // 1. Measure value range
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 // 2. Measure sortedness and patterns
 if (i > 0) {
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }
 }

You have a condition which is only ever false on the very first iteration. You may wish to unroll this so that this branch is absent in the loop.

 min_val = arr[0];
 max_val = arr[0];
 // Single-pass analysis: extract data patterns
 for (int i = 1; i < n; i++) {
 // 1. Measure value range
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 // 2. Measure sortedness and patterns
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }

Question 8

Thank you for this incredibly thorough and insightful review! You're absolutely right about: - Using enums for clarity (much more maintainable) - The switch refactoring for safer defaults - Eliminating magic numbers with named constants - The loop unrolling optimization (brilliant performance insight!) The loop optimization in particular is something I wouldn't have thought of - removing that branch prediction overhead is a great catch. Again this is incredibly valuable feedback.

Question 9

@LazyCauchPotato I think the loop unrolling wouldn't do too much for performance. The CPU can do branch prediction: if an if statement always chooses the same branch, it won't affect performance too much. I would be interested to know how much you actually save. But another great benefit is reducing nesting. Code complexity, i.e. how hard it is to read and understand your code, scales very poorly with the nesting level. Before your greatest nesting level was 4 indents, after the change it is 3. Sounds small, but it is great actually.

Question 10

I think overall readability OK.

There is an ambiguity in the documentation in the header: Minimal memory overhead (256 bytes) - does this mean maximal overhead is small, or minimal among all possible ordering routines, or overhead used here is never less than 256 bytes? If the 1st, the value stated is misleadingly understating. It would be too small even if the type chosen for counts was single byte due to call overhead. Where sizeof int == 8, expect more than 2K.

I can gleen that the array is ordered in place.
I don't know why I like it less that there is no stating in non-descending order.

I'd not prefix static names like bres_.
On 2nd take, bres_analyze_strategy() is a strange name.
Its code reads analyse data and decide strategy, indicating it does two things (connected by data, only).

Include don't rearrange in the enumeration of ordering methods.

In bres_insertion_sort()'s "Manual unrolling for small array-if-else", the controlled statements look identical.

While compilers take care of many things, I'm with say what you mean. "That loop" once more:

 unsigned char min_val, max_val = min_val = arr[0];
 unsigned char current, previous = min_val;
 // Single-pass analysis for extrema,
 // max. length among ascending runs and total descends
 for (int i = 1; i < n; i++, previous = current) {
 current = arr[i];
 if (current >= previous) {
 if (current > max_val)
 max_val = current;
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 if (current < min_val)
 min_val = current;
 out_of_order++;
 consecutive_sorted = 0;
 }
 }

On 2nd take, the summary information is asymmetric between descending & ascending.

I'd start selecting the ordering method with

 if (out_of_order < 1)
 return NON_DESCENDING;

Without this, and the decision for range == 0 different from range < 16 nest the check for 0 in that for < 16.

Architecturally, hybrid_sort() is strange in

being a second place where to decide on the ordering procedure to use
repeating establishing the value range.

Re. Ultra-optimized insertion sort find how to control the inner loop with a single comparison.

Come to think of it, communicating the extrema to counting_sort() can reduce processing "0 entries". And the array size used - I don't think I'd go as far as trying "min-based" instead of 0-based.

Question 11

Thank you, your review is pure gold, you're absolutely right about the architectural issues - having decision logic in multiple places is a maintenance nightmare. I'll refactor to separate data analysis from strategy selection. The memory documentation point is particularly embarrassing - thank you for catching that! I'll work on these refactors this week and update the GitHub repo

Question 12

I have a full version ready that handles 32/64-bit and floating-point data. I'll start a new question for it. All comments were super helpful. I hope I didn't make the same mistakes in the full version.

Question 13

In addition to the previous answers, here are some other minor suggestions.

Documentation

There are a lot of helpful comments. It would be good to explain what "BRE" stands for, if it is an acronym or what it means to you.

Since you tagged the question genetic-algorithm, it would be good to mention that in the comments as well.

Simpler

In the bres_analyze_strategy function, this code:

if (range == 0) {
 return 0; // All identical - counting sort instant
}

is unnecessary and can be deleted. The case where range is 0 is already covered by the following check:

if (range < 16) {

You can keep the comment:

// If range is 0: All identical - counting sort instant

Readability

When the number of operators starts adding up:

if (range < 32 || range < n / 8) {

I like to add more parentheses:

if ((range < 32) || (range < n / 8)) {

It helps a person group the terms without having to think as much.

Question 14

Thanks for the valuable inputs, really appreciate! I've removed the redundant range check. Fixed documentation explaining 'BRE' stands for 'Bitwise Relationship Extraction', clarified comments.

Question 15

@LazyCauchPotato: You're welcome.

Question 16

We perhaps have different tastes in parens - probably I've had my fill of them when writing Lisp!

Question 17

Compiler warnings

Code performs about 10 casual signed to/from unsigned conversions. Consider enabling more compiler warnings to identify.

Array sizing

As an alternative for qsort(), consider a signature more like void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));

For void BRESort_byte(unsigned char arr[], int n);, this would mean using size_t n instead of int n.

size_t is also the Goldilocks type for array sizing and indexing: not too narrow, not too wide.

This is also a fairer comparison against qsort(), which can handle arrays more than INT_MAX.

Since size_t is an unsigned type, code needs review to avoid negatives.

This also reduces the casual type conversions going on with mem...() calls.

Avoid naked magic numbers

Instead of values like 3, 8, 32, ... (with no explanation), use a self-documenting macro or named constant.

Use standard library ones when possible. E.g. rather than 255, UCHAR_MAX when assigning the maximal unsigned char, int count[256] = {0}; --> int count[UCHAR_MAX + 1] = {0};.

If code depends on an 8-bit char, use static_assert(CHAR_BIT == 8, "Must use 8-bit char");. I suspect CHAR_BIT <= 12 (or so) may be supportable.

Back up the claim

To avoid dismissing "3-4x faster than qsort", post deserves a test harness to demo it.

I could see this code using a dedicated sort function rather than qsort()'s function pointer accounts for a good deal of the improvement.

Organization

"SPECIALIZED BYTE DATA GENERATORS" deserve a separate .c file.

Minor: Argument order

(unsigned char arr[], int n) is more common that (int n, unsigned char arr[]) yet I find (int n, unsigned char arr[n]) or (int n, unsigned char arr[static n]) more useful as it allows various analysis inspections.

Question 18

Really appreciate that you took your time to review the code! I will make these corrections in the full blown version of BRESort - this 'byte' version is a test run to assess the concept and get a reality check! The full version extends this intelligent approach to 32/64-bit integers and floating-point data.

Question 19

As for the function pointer overhead in qsort, yeah... you're right. Changing my test

Question 20

You were absolutely right about the function pointer overhead! My validation show significant gains in performance!

Question 21

@LazyCauchPotato There are many was to sort FP given NaN. At least the sort criteria should explain how it handles NaNs.

Chris 4,6941 gold badge7 silver badges37 bronze badges · Answer 1 · 2025-10-10 23:42:02Z

Without seeing all of the code, based only on what I can see, I'm not a fan of the 0, 1, and 2 being magic numbers to indicate which sort to use. This should almost certainly be an enum.

enum SortStrategy {
 COUNTING_SORT,
 INSERTION_SORT,
 HYBRID_SORT
};

Your switch also could be refactored to avoid repetition by letting the counting sort case fall through to the default case. By using the enum with meaningful names, the comments become extraneous.

 switch (strategy) { 
 case INSERTION_SORT: 
 bres_insertion_sort(arr, n); 
 break; 
 case HYBRID_SORT: 
 bres_hybrid_sort(arr, n); 
 break; 
 
 case COUNTING_SORT:
 default: 
 bres_counting_sort(arr, n); 
 break;
 }

Other magic numbers to eliminate would be the thresholds for invoking each sorting method.

You have this loop:

 // Single-pass analysis: extract data patterns
 for (int i = 0; i < n; i++) {
 // 1. Measure value range
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 // 2. Measure sortedness and patterns
 if (i > 0) {
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }
 }

You have a condition which is only ever false on the very first iteration. You may wish to unroll this so that this branch is absent in the loop.

 min_val = arr[0];
 max_val = arr[0];
 // Single-pass analysis: extract data patterns
 for (int i = 1; i < n; i++) {
 // 1. Measure value range
 if (arr[i] < min_val) min_val = arr[i];
 if (arr[i] > max_val) max_val = arr[i];
 // 2. Measure sortedness and patterns
 if (arr[i] >= arr[i-1]) {
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 out_of_order++;
 consecutive_sorted = 0;
 }
 }

Thank you for this incredibly thorough and insightful review! You're absolutely right about: - Using enums for clarity (much more maintainable) - The switch refactoring for safer defaults - Eliminating magic numbers with named constants - The loop unrolling optimization (brilliant performance insight!) The loop optimization in particular is something I wouldn't have thought of - removing that branch prediction overhead is a great catch. Again this is incredibly valuable feedback.
@LazyCauchPotato I think the loop unrolling wouldn't do too much for performance. The CPU can do branch prediction: if an if statement always chooses the same branch, it won't affect performance too much. I would be interested to know how much you actually save. But another great benefit is reducing nesting. Code complexity, i.e. how hard it is to read and understand your code, scales very poorly with the nesting level. Before your greatest nesting level was 4 indents, after the change it is 3. Sounds small, but it is great actually.

greybeard 7,7493 gold badges21 silver badges56 bronze badges · Answer 2 · 2025-10-11 06:50:53Z

I think overall readability OK.

There is an ambiguity in the documentation in the header: Minimal memory overhead (256 bytes) - does this mean maximal overhead is small, or minimal among all possible ordering routines, or overhead used here is never less than 256 bytes? If the 1st, the value stated is misleadingly understating. It would be too small even if the type chosen for counts was single byte due to call overhead. Where sizeof int == 8, expect more than 2K.

I can gleen that the array is ordered in place.
I don't know why I like it less that there is no stating in non-descending order.

I'd not prefix static names like bres_.
On 2nd take, bres_analyze_strategy() is a strange name.
Its code reads analyse data and decide strategy, indicating it does two things (connected by data, only).

Include don't rearrange in the enumeration of ordering methods.

In bres_insertion_sort()'s "Manual unrolling for small array-if-else", the controlled statements look identical.

While compilers take care of many things, I'm with say what you mean. "That loop" once more:

 unsigned char min_val, max_val = min_val = arr[0];
 unsigned char current, previous = min_val;
 // Single-pass analysis for extrema,
 // max. length among ascending runs and total descends
 for (int i = 1; i < n; i++, previous = current) {
 current = arr[i];
 if (current >= previous) {
 if (current > max_val)
 max_val = current;
 consecutive_sorted++;
 if (consecutive_sorted > max_consecutive) {
 max_consecutive = consecutive_sorted;
 }
 } else {
 if (current < min_val)
 min_val = current;
 out_of_order++;
 consecutive_sorted = 0;
 }
 }

On 2nd take, the summary information is asymmetric between descending & ascending.

I'd start selecting the ordering method with

 if (out_of_order < 1)
 return NON_DESCENDING;

Without this, and the decision for range == 0 different from range < 16 nest the check for 0 in that for < 16.

Architecturally, hybrid_sort() is strange in

being a second place where to decide on the ordering procedure to use
repeating establishing the value range.

Re. Ultra-optimized insertion sort find how to control the inner loop with a single comparison.

Come to think of it, communicating the extrema to counting_sort() can reduce processing "0 entries". And the array size used - I don't think I'd go as far as trying "min-based" instead of 0-based.

Thank you, your review is pure gold, you're absolutely right about the architectural issues - having decision logic in multiple places is a maintenance nightmare. I'll refactor to separate data analysis from strategy selection. The memory documentation point is particularly embarrassing - thank you for catching that! I'll work on these refactors this week and update the GitHub repo
I have a full version ready that handles 32/64-bit and floating-point data. I'll start a new question for it. All comments were super helpful. I hope I didn't make the same mistakes in the full version.

toolic 15.9k6 gold badges29 silver badges217 bronze badges · Answer 3 · 2025-10-11 12:20:48Z

In addition to the previous answers, here are some other minor suggestions.

Documentation

There are a lot of helpful comments. It would be good to explain what "BRE" stands for, if it is an acronym or what it means to you.

Since you tagged the question genetic-algorithm, it would be good to mention that in the comments as well.

Simpler

In the bres_analyze_strategy function, this code:

if (range == 0) {
 return 0; // All identical - counting sort instant
}

is unnecessary and can be deleted. The case where range is 0 is already covered by the following check:

if (range < 16) {

You can keep the comment:

// If range is 0: All identical - counting sort instant

Readability

When the number of operators starts adding up:

if (range < 32 || range < n / 8) {

I like to add more parentheses:

if ((range < 32) || (range < n / 8)) {

It helps a person group the terms without having to think as much.

Thanks for the valuable inputs, really appreciate! I've removed the redundant range check. Fixed documentation explaining 'BRE' stands for 'Bitwise Relationship Extraction', clarified comments.
We perhaps have different tastes in parens - probably I've had my fill of them when writing Lisp!

chux 36.4k2 gold badges43 silver badges97 bronze badges · Answer 4 · 2025-10-11 15:07:37Z

Compiler warnings

Code performs about 10 casual signed to/from unsigned conversions. Consider enabling more compiler warnings to identify.

Array sizing

As an alternative for qsort(), consider a signature more like void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));

For void BRESort_byte(unsigned char arr[], int n);, this would mean using size_t n instead of int n.

size_t is also the Goldilocks type for array sizing and indexing: not too narrow, not too wide.

This is also a fairer comparison against qsort(), which can handle arrays more than INT_MAX.

Since size_t is an unsigned type, code needs review to avoid negatives.

This also reduces the casual type conversions going on with mem...() calls.

Avoid naked magic numbers

Instead of values like 3, 8, 32, ... (with no explanation), use a self-documenting macro or named constant.

Use standard library ones when possible. E.g. rather than 255, UCHAR_MAX when assigning the maximal unsigned char, int count[256] = {0}; --> int count[UCHAR_MAX + 1] = {0};.

If code depends on an 8-bit char, use static_assert(CHAR_BIT == 8, "Must use 8-bit char");. I suspect CHAR_BIT <= 12 (or so) may be supportable.

Back up the claim

To avoid dismissing "3-4x faster than qsort", post deserves a test harness to demo it.

I could see this code using a dedicated sort function rather than qsort()'s function pointer accounts for a good deal of the improvement.

Organization

"SPECIALIZED BYTE DATA GENERATORS" deserve a separate .c file.

Minor: Argument order

(unsigned char arr[], int n) is more common that (int n, unsigned char arr[]) yet I find (int n, unsigned char arr[n]) or (int n, unsigned char arr[static n]) more useful as it allows various analysis inspections.

Really appreciate that you took your time to review the code! I will make these corrections in the full blown version of BRESort - this 'byte' version is a test run to assess the concept and get a reality check! The full version extends this intelligent approach to 32/64-bit integers and floating-point data.
As for the function pointer overhead in qsort, yeah... you're right. Changing my test
You were absolutely right about the function pointer overhead! My validation show significant gains in performance!
@LazyCauchPotato There are many was to sort FP given NaN. At least the sort criteria should explain how it handles NaNs.

Stack Exchange Network

My BRESort adaptive sorting engine in C

bresort.c

bresort.h

4 Answers 4

Documentation

Simpler

Readability

You must log in to answer this question.

Linked

Hot Network Questions

My BRESort adaptive sorting engine in C

bresort.c

bresort.h

4 Answers 4

Documentation

Simpler

Readability

You must log in to answer this question.

Linked

Related

Hot Network Questions