MSD Quick Radix Sort in Place in C++, Object/Pointer Oriented

Question 1

Memory: O(log2(max)*2)~O(1)
Speed: O(log2(max)*n)~O(n)

so i did before a MSD Radix Sort in place but i wanted to do one, with out the count, So i join together the quick sort and radix sort. Think as msd radix sort mod of 2.

Algorithm

i will explain this one directly with a table.

table quick radix sort

Code

start,finish,depth,mid are copy value because we can't modify because is a recursive funtion
s,f,mod_cal,auxiliar are static for fast calling and the memory usage

#pragma once
namespace leixor {
 //fast log in power of 2 we are using size so we can't use bit_width
 //is in excess on purpose
 static size_t& logb2(size_t num) {
 size_t aux = 1;
 if (num == 0) return (aux = -1);
 while (0 < (num >>= 1))
 aux++;
 return aux;
 }
 //------------------------obj version---------------------------
 //custom get_max because i don't wanna add lib,and normaly we will use this in a custom class
 template<class obj_arr>
 static size_t& get_max(obj_arr*& arr, const size_t& size) {
 
 size_t i = size%2, aux = arr[0].get_size_t(), aux2=-1;
 if (size == 0)return aux2;
 for (; i < size; i += 2) {
 aux2 = arr[i].get_size_t();
 aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
 aux2 = arr[i + 1].get_size_t();
 aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
 //aux= (aux < aux2)?aux2:aux; are the same speed as the lane before
 }
 return aux;
 }
 //----------------------------pointer obj version---------------------
 template<class obj_arr>
 static size_t& get_max(obj_arr**& arr, const size_t& size) {
 
 size_t i = size%2, aux = arr[0]->get_size_t(), aux2=-1;
 if (size == 0)return aux2;
 for (; i < size; i += 2) {
 aux2 = arr[i]->get_size_t();
 aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
 aux2 = arr[i + 1]->get_size_t();
 aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
 }
 return aux;
 }
 //---------------------------------------------------------------------
 //-----------------object orientedversion---------
 template<class obj_arr>
 void two_rad(obj_arr*& arr, size_t start, size_t finish, size_t depth) {
 depth--;
 static size_t s, f;
 static bool mod_cal;
 static obj_arr auxiliar;
 f = finish - 1; s = start;
 while (s < f) {
 auxiliar = arr[s];
 if (mod_cal=(auxiliar.get_size_t() >> depth) % 2) {
 arr[s] = arr[f];
 arr[f] = auxiliar;
 f--;
 }
 s+=!mod_cal;
 }
 size_t mid = s + !((arr[s].get_size_t() >> depth) % 2);
 if (depth > 0) {
 if (mid - start > 2)
 two_rad(arr, start, mid, depth);
 else if (mid - start > 1) {
 mod_cal = arr[start].get_size_t() > arr[start + 1].get_size_t();
 auxiliar = arr[start];
 arr[start] = arr[start + mod_cal];
 arr[start + mod_cal] = auxiliar;
 }
 if (finish - mid > 2)
 two_rad(arr, mid, finish, depth);
 else if (finish - mid > 1) {
 mod_cal = arr[mid].get_size_t() > arr[mid + 1].get_size_t();
 auxiliar = arr[mid];
 arr[mid] = arr[mid + mod_cal];
 arr[mid + mod_cal] = auxiliar;
 }
 }
 }
 template<class obj_arr>
 bool qick_radix_sort(obj_arr* arr, size_t size) {
 if (arr == nullptr || size < 2)
 return false;
 two_rad(arr, 0, size, logb2(get_max(arr, size)));
 return true;
 }
 //-----------------pointer orientedversion---------
 template<class obj_arr>
 void two_rad(obj_arr**& arr, size_t start, size_t finish, size_t depth) {
 depth--;
 static size_t s, f;
 static bool mod_cal;
 static obj_arr* auxiliar;
 
 f = finish - 1; s = start;
 while (s < f) {
 auxiliar = arr[s];
 if (mod_cal=(auxiliar->get_size_t() >> depth) % 2) {
 arr[s] = arr[f];
 arr[f] = auxiliar;
 f--;
 }
 s+=!mod_cal;
 }
 size_t mid =s+ !((arr[s]->get_size_t() >> depth)%2);
 if (depth>0){
 if (mid-start>2)
 two_rad(arr,start,mid,depth);
 else if (mid - start > 1) {
 mod_cal = arr[start]->get_size_t() > arr[start + 1]->get_size_t();
 
 auxiliar = arr[start];
 arr[start] = arr[start + mod_cal];
 arr[start + mod_cal] = auxiliar;
 }
 if (finish - mid > 2)
 two_rad(arr,mid,finish,depth);
 else if (finish - mid > 1) {
 mod_cal = arr[mid]->get_size_t() > arr[mid + 1]->get_size_t();
 auxiliar = arr[mid];
 arr[mid] = arr[mid + mod_cal];
 arr[mid + mod_cal] = auxiliar;
 }
 }
 }
 template<class obj_arr>
 bool qick_radix_sort(obj_arr** arr, size_t size) {
 if (arr == nullptr || size < 2)
 return false;
 two_rad(arr, 0, size, logb2(get_max(arr, size)));
 return true;
 }
}

needs

funtion inside a class as get_size_t return a size_t variable can be reference
and the calling of this sort is leixor::qick_radix_sort(arr, size);

ex:std::vector<D_CLASS> a; leixor::qick_radix_sort(a.data(), a.size());

Runtimes

the runtimes are in miliseconds, avarge of 100 times

optimization, new runtimes need it

/*
 --------------------------------------------------
 ----------------object oriented version-----------
 --------------------------------------------------
 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.002613|0.020518|0.216481|2.23518|22.4944|222.093|2197.06
 --------------------------------------------------
num|2 |0.003194|0.033248|0.31865|3.20488|31.2802|319.138|3135.21
 --------------------------------------------------
pow|3 |0.003617|0.039951|0.383426|3.9421|39.9787|419.901|4099.95
 --------------------------------------------------
2 |4 |0.003958|0.046783|0.478583|4.77747|47.9917|514.014|5001.75
 --------------------------------------------------
 |5 |0.003758|0.06327|0.604892|6.03168|58.432|579.918|5871.16
 --------------------------------------------------
 |6 |0.004298|0.064584|0.671869|6.92542|71.2283|703.449|6784.92
 --------------------------------------------------
 |7 |0.003727|0.062074|0.738229|7.47593|75.0941|752.692|7585.21
 --------------------------------------------------
 --------------------------------------------------
 ----------------pointer oriented version-----------
 --------------------------------------------------
 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.001209|0.006741|0.069374|0.888095|16.3528|186.809|1814.67
 --------------------------------------------------
num|2 |0.001486|0.010471|0.107467|1.23582|27.598|312.24|3067.28
 --------------------------------------------------
pow|3 |0.001785|0.014352|0.140364|1.69012|33.9956|428.07|4294.08
 --------------------------------------------------
2 |4 |0.002111|0.018542|0.176611|2.13729|39.5618|549.902|5661.17
 --------------------------------------------------
 |5 |0.002112|0.022938|0.216402|2.51519|50.7784|683.679|7022.75
 --------------------------------------------------
 |6 |0.002354|0.027141|0.260751|2.9174|57.0826|795.653|8816.75
 --------------------------------------------------
 |7 |0.00232|0.030715|0.290544|3.32639|61.756|917.006|10731
 --------------------------------------------------
*/

RunTime code

average of 100

#include "qick_radix_sort.h"
#include <stdlib.h>
#include <time.h> 
#include <iostream>
#include <chrono>
#include<string>
#include <array>
#include <math.h>
#include <bitset>
class data
{
public:
 data() {}
 data(size_t& A):m(A) {};
 size_t& get_size_t() { return m; };
 
 void operator = (data& A) { m = A.m; };
 void operator = (size_t& A) { m = A; };
 size_t m;
};
void object_version(const size_t pow10, const size_t pow2) {
 size_t size = pow(10, pow10);
 data* arr = new data[size];
 size_t mod = 1 << (2 + pow2);
 float mediana = 0.0;
 for (size_t i = 0; i < 100; i++)
 {
 for (size_t i = 0; i < size; i++) {
 arr[i].m = rand() % mod;
 }
 auto started = std::chrono::high_resolution_clock::now();
 leixor::qick_radix_sort(arr, size);
 auto done = std::chrono::high_resolution_clock::now();
 mediana += float(std::chrono::duration_cast<std::chrono::nanoseconds>(done - started).count()) / float(1000000);
 }
 std::cout << "|" << mediana /100;
 delete[] arr;
};
void pointer_object_version(const size_t pow10, const size_t pow2) {
 size_t size = pow(10, pow10);
 data** arr = new data * [size];
 size_t mod = 1 << (1+ pow2);
 float mediana = 0.0;
 for (size_t i = 0; i < size; i++) 
 arr[i] = new data();
 for (size_t i = 0; i < 100; i++)
 {
 for (size_t i = 0; i < size; i++) {
 arr[i]->m = rand() % mod;
 }
 auto started = std::chrono::high_resolution_clock::now();
 leixor::qick_radix_sort(arr, size);
 auto done = std::chrono::high_resolution_clock::now();
 mediana += float(std::chrono::duration_cast<std::chrono::nanoseconds>(done - started).count()) / float(1000000);
 }
 std::cout << "|" << mediana / 100;
 for (size_t i = 0; i < size; i++) {
 delete arr[i];
 }
 delete[] arr;
};
int main() {
 std::cout << "starting...\n";
 std::array<std::string, 7> alfa = { "\nmax|1 ","\nnum|2 ","\npow|3 ","\n2 |4 ","\n |5 " ,"\n |6 " ,"\n |7 " };
std::cout <<
 "\n --------------------------------------------------" <<
 "\n ----------------object oriented version-----------" <<
 "\n --------------------------------------------------\n\n";
 std::cout <<
 "\n ---------------------size pow 10------------------" <<
 "\n --------------------------------------------------" <<
 "\n | 1 | 2 | 3 | 4 | 5 | 6 | 7 |";
 for (size_t z = 0; z < 7; z++) {
 std::cout << "\n --------------------------------------------------" <<
 alfa[z];
 for (size_t i = 1; i < 8; i++) {
 object_version(i, z);
 }
 }
 std::cout << "\n --------------------------------------------------\n";
 std::cout <<
 "\n --------------------------------------------------" <<
 "\n ----------------pointer oriented version-----------" <<
 "\n --------------------------------------------------\n\n";
 std::cout <<
 "\n ---------------------size pow 10------------------" <<
 "\n --------------------------------------------------" <<
 "\n | 1 | 2 | 3 | 4 | 5 | 6 | 7 |";
 for (size_t z = 0; z < 7; z++) {
 std::cout << "\n --------------------------------------------------" <<
 alfa[z];
 for (size_t i = 1; i < 8; i++) {
 pointer_object_version(i, z);
 }
 }
 std::cout << "\n --------------------------------------------------\n";
}

Question 2

Review:

static size_t& logb2(size_t num) {
 size_t aux = 1;
 if (num == 0) return (aux = -1);
 while (0 < (num >>= 1))
 aux++;
 return aux;
}

This is returning a reference to a local object. aux goes out of scope at the end of the function, so there's no way that the calling function can use a reference to it.

It might work now, but on another compiler it might crash. It also might stop working (a crash, or worse: silently returning incorrect results) if you change your compiler options, or change your program slightly.

get_max has the same problem.

 size_t i = size%2, aux = arr[0].get_size_t(), aux2=-1;
 if (size == 0)return aux2;

Accessing arr[0] is not safe if size == 0, so the size check needs to be done first.

 static size_t s, f;
 static bool mod_cal;
 static obj_arr auxiliar;

These do not need to be static. Making them static is likely to make compiler optimization more difficult, and make the program slower.

 depth--;

It would be more usual to decrement depth when calling the next level of recursive function. (Which would mean we don't need the "off-by-one" log base 2 function).

 size_t mid =s+ !((arr[s]->get_size_t() >> depth)%2);
 if (depth>0){
 if (mid-start>2)
 two_rad(arr,start,mid,depth);
 else if (mid - start > 1) {
 mod_cal = arr[start]->get_size_t() > arr[start + 1]->get_size_t();
 
 auxiliar = arr[start];
 arr[start] = arr[start + mod_cal];
 arr[start + mod_cal] = auxiliar;
 }
 if (finish - mid > 2)
 two_rad(arr,mid,finish,depth);
 else if (finish - mid > 1) {
 mod_cal = arr[mid]->get_size_t() > arr[mid + 1]->get_size_t();
 auxiliar = arr[mid];
 arr[mid] = arr[mid + mod_cal];
 arr[mid + mod_cal] = auxiliar;
 }
 }

Most of this special-case logic is unnecessary. We can simply call the next level function, and check the size there before doing any work. i.e.:

// ... at the top of two_rad()
if (finish - start < 2) return; // nothing to do!
... processing ...
// ... at the bottom of two_rad():
if (depth == 0) return;
two_rad(arr, start, mid, depth - 1);
two_rad(arr, mid, finish, depth - 1);

Changes:

I made a series of changes to see what happened performance-wise. (Only looking at the "object-oriented" version).

Initial performance looked like this:

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000244|0.001618|0.014842|0.146165|1.44533
 --------------------------------------------------
num|2 |0.0003|0.002286|0.020734|0.210913|2.11932
 --------------------------------------------------
pow|3 |0.000343|0.003042|0.028168|0.279006|2.77773
 --------------------------------------------------
2 |4 |0.000356|0.003863|0.034078|0.345635|3.51738
 --------------------------------------------------
 |5 |0.000365|0.004637|0.040989|0.409529|4.11875
 --------------------------------------------------

Changes:

Removing data class, and taking an array of size_t instead of a template argument.
Using *std::max_element(arr, arr + size); instead of the custom get_max function.

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000215|0.001568|0.015363|0.146712|1.45304
 --------------------------------------------------
num|2 |0.000287|0.002171|0.020847|0.206735|2.07393
 --------------------------------------------------
pow|3 |0.000328|0.002852|0.026889|0.26842|2.70905
 --------------------------------------------------
2 |4 |0.000332|0.003548|0.033133|0.329062|3.28266
 --------------------------------------------------
 |5 |0.000344|0.004271|0.039608|0.390457|3.90385
 --------------------------------------------------

Changes:

Removing the unnecessary static variables.
Use std::swap.

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000225|0.00159|0.015389|0.146791|1.5249
 --------------------------------------------------
num|2 |0.000288|0.002196|0.020787|0.203742|2.06895
 --------------------------------------------------
pow|3 |0.000339|0.002963|0.028822|0.264389|2.66769
 --------------------------------------------------
2 |4 |0.000356|0.003552|0.033073|0.328369|3.27564
 --------------------------------------------------
 |5 |0.000348|0.004297|0.039552|0.387799|3.89064
 --------------------------------------------------

Changes:

Simplify the recursion (remove checking for size 1 or size 2 arrays as described above, just call the next two_rad function).

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000222|0.00158|0.015265|0.148654|1.46329
 --------------------------------------------------
num|2 |0.000286|0.002178|0.020024|0.203993|2.07686
 --------------------------------------------------
pow|3 |0.000377|0.002855|0.025927|0.27132|2.68528
 --------------------------------------------------
2 |4 |0.000504|0.003546|0.033194|0.329328|3.3009
 --------------------------------------------------
 |5 |0.000607|0.004372|0.038077|0.377845|3.9096
 --------------------------------------------------

Changes:

Switch to using iterators (pointers).
Change bit checking operation to (value & T{ 1 } << bit);

Results: (slightly faster)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000192|0.001295|0.013703|0.137851|1.25551
 --------------------------------------------------
num|2 |0.000288|0.001852|0.017461|0.1866|1.73913
 --------------------------------------------------
pow|3 |0.000376|0.002432|0.02283|0.222846|2.25643
 --------------------------------------------------
2 |4 |0.000469|0.003089|0.027707|0.272459|2.7158
 --------------------------------------------------
 |5 |0.00058|0.003853|0.033452|0.32207|3.27698
 --------------------------------------------------

With this last version, we have one less function argument to pass in two_rad, so it's slightly faster because of that. The alternative method of checking the set bit is also slightly faster.

The final code looked like this:

#include <algorithm>
#include <bit>
#include <cmath>
#include <type_traits>
template<class T>
bool is_bit_set(T value, T bit)
{
 return value & (T{ 1 } << bit);
 //return ((value >> bit) % 2);
}
template<class It>
void two_rad(It begin, It end, typename std::iterator_traits<It>::value_type bit)
{
 if (begin == end) return;
 auto s = begin;
 auto f = std::prev(end);
 while (s != f)
 {
 if (is_bit_set(*s, bit))
 {
 std::swap(*s, *f);
 --f;
 }
 else
 ++s;
 }
 if (bit == 0) return;
 if (!is_bit_set(*s, bit))
 ++s;
 two_rad(begin, s, bit - 1);
 two_rad(s, end, bit - 1);
}
template<class It,
 class = std::enable_if_t<std::is_unsigned_v<typename std::iterator_traits<It>::value_type>> >
void quick_radix_sort(It begin, It end)
{
 if (begin == end) return;
 auto max = *std::max_element(begin, end);
 if (max == 0) return;
 auto max_bit = std::bit_width(max) - typename std::iterator_traits<It>::value_type{ 1 };
 two_rad(begin, end, max_bit);
}

Question 3

one question if a have a class like idk point with x,y and z how i implement that here?

Question 4

OPs comment that "not using std::swap because it's slow" immediately raised red flags for the kind of attitude where everything someone else or the standard library made must be slow because they didn't make it then selves... Thanks for setting the record straight with respect to std::swap.

user673679 user673679 12.2k2 gold badges34 silver badges65 bronze badges · Accepted Answer · 2021-02-06 16:56:23Z

Review:

static size_t& logb2(size_t num) {
 size_t aux = 1;
 if (num == 0) return (aux = -1);
 while (0 < (num >>= 1))
 aux++;
 return aux;
}

This is returning a reference to a local object. aux goes out of scope at the end of the function, so there's no way that the calling function can use a reference to it.

It might work now, but on another compiler it might crash. It also might stop working (a crash, or worse: silently returning incorrect results) if you change your compiler options, or change your program slightly.

get_max has the same problem.

 size_t i = size%2, aux = arr[0].get_size_t(), aux2=-1;
 if (size == 0)return aux2;

Accessing arr[0] is not safe if size == 0, so the size check needs to be done first.

 static size_t s, f;
 static bool mod_cal;
 static obj_arr auxiliar;

These do not need to be static. Making them static is likely to make compiler optimization more difficult, and make the program slower.

 depth--;

It would be more usual to decrement depth when calling the next level of recursive function. (Which would mean we don't need the "off-by-one" log base 2 function).

 size_t mid =s+ !((arr[s]->get_size_t() >> depth)%2);
 if (depth>0){
 if (mid-start>2)
 two_rad(arr,start,mid,depth);
 else if (mid - start > 1) {
 mod_cal = arr[start]->get_size_t() > arr[start + 1]->get_size_t();
 
 auxiliar = arr[start];
 arr[start] = arr[start + mod_cal];
 arr[start + mod_cal] = auxiliar;
 }
 if (finish - mid > 2)
 two_rad(arr,mid,finish,depth);
 else if (finish - mid > 1) {
 mod_cal = arr[mid]->get_size_t() > arr[mid + 1]->get_size_t();
 auxiliar = arr[mid];
 arr[mid] = arr[mid + mod_cal];
 arr[mid + mod_cal] = auxiliar;
 }
 }

Most of this special-case logic is unnecessary. We can simply call the next level function, and check the size there before doing any work. i.e.:

// ... at the top of two_rad()
if (finish - start < 2) return; // nothing to do!
... processing ...
// ... at the bottom of two_rad():
if (depth == 0) return;
two_rad(arr, start, mid, depth - 1);
two_rad(arr, mid, finish, depth - 1);

Changes:

I made a series of changes to see what happened performance-wise. (Only looking at the "object-oriented" version).

Initial performance looked like this:

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000244|0.001618|0.014842|0.146165|1.44533
 --------------------------------------------------
num|2 |0.0003|0.002286|0.020734|0.210913|2.11932
 --------------------------------------------------
pow|3 |0.000343|0.003042|0.028168|0.279006|2.77773
 --------------------------------------------------
2 |4 |0.000356|0.003863|0.034078|0.345635|3.51738
 --------------------------------------------------
 |5 |0.000365|0.004637|0.040989|0.409529|4.11875
 --------------------------------------------------

Changes:

Removing data class, and taking an array of size_t instead of a template argument.
Using *std::max_element(arr, arr + size); instead of the custom get_max function.

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000215|0.001568|0.015363|0.146712|1.45304
 --------------------------------------------------
num|2 |0.000287|0.002171|0.020847|0.206735|2.07393
 --------------------------------------------------
pow|3 |0.000328|0.002852|0.026889|0.26842|2.70905
 --------------------------------------------------
2 |4 |0.000332|0.003548|0.033133|0.329062|3.28266
 --------------------------------------------------
 |5 |0.000344|0.004271|0.039608|0.390457|3.90385
 --------------------------------------------------

Changes:

Removing the unnecessary static variables.
Use std::swap.

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000225|0.00159|0.015389|0.146791|1.5249
 --------------------------------------------------
num|2 |0.000288|0.002196|0.020787|0.203742|2.06895
 --------------------------------------------------
pow|3 |0.000339|0.002963|0.028822|0.264389|2.66769
 --------------------------------------------------
2 |4 |0.000356|0.003552|0.033073|0.328369|3.27564
 --------------------------------------------------
 |5 |0.000348|0.004297|0.039552|0.387799|3.89064
 --------------------------------------------------

Changes:

Simplify the recursion (remove checking for size 1 or size 2 arrays as described above, just call the next two_rad function).

Results: (pretty much the same)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000222|0.00158|0.015265|0.148654|1.46329
 --------------------------------------------------
num|2 |0.000286|0.002178|0.020024|0.203993|2.07686
 --------------------------------------------------
pow|3 |0.000377|0.002855|0.025927|0.27132|2.68528
 --------------------------------------------------
2 |4 |0.000504|0.003546|0.033194|0.329328|3.3009
 --------------------------------------------------
 |5 |0.000607|0.004372|0.038077|0.377845|3.9096
 --------------------------------------------------

Changes:

Switch to using iterators (pointers).
Change bit checking operation to (value & T{ 1 } << bit);

Results: (slightly faster)

 ---------------------size pow 10------------------
 --------------------------------------------------
 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
 --------------------------------------------------
max|1 |0.000192|0.001295|0.013703|0.137851|1.25551
 --------------------------------------------------
num|2 |0.000288|0.001852|0.017461|0.1866|1.73913
 --------------------------------------------------
pow|3 |0.000376|0.002432|0.02283|0.222846|2.25643
 --------------------------------------------------
2 |4 |0.000469|0.003089|0.027707|0.272459|2.7158
 --------------------------------------------------
 |5 |0.00058|0.003853|0.033452|0.32207|3.27698
 --------------------------------------------------

With this last version, we have one less function argument to pass in two_rad, so it's slightly faster because of that. The alternative method of checking the set bit is also slightly faster.

The final code looked like this:

#include <algorithm>
#include <bit>
#include <cmath>
#include <type_traits>
template<class T>
bool is_bit_set(T value, T bit)
{
 return value & (T{ 1 } << bit);
 //return ((value >> bit) % 2);
}
template<class It>
void two_rad(It begin, It end, typename std::iterator_traits<It>::value_type bit)
{
 if (begin == end) return;
 auto s = begin;
 auto f = std::prev(end);
 while (s != f)
 {
 if (is_bit_set(*s, bit))
 {
 std::swap(*s, *f);
 --f;
 }
 else
 ++s;
 }
 if (bit == 0) return;
 if (!is_bit_set(*s, bit))
 ++s;
 two_rad(begin, s, bit - 1);
 two_rad(s, end, bit - 1);
}
template<class It,
 class = std::enable_if_t<std::is_unsigned_v<typename std::iterator_traits<It>::value_type>> >
void quick_radix_sort(It begin, It end)
{
 if (begin == end) return;
 auto max = *std::max_element(begin, end);
 if (max == 0) return;
 auto max_bit = std::bit_width(max) - typename std::iterator_traits<It>::value_type{ 1 };
 two_rad(begin, end, max_bit);
}

one question if a have a class like idk point with x,y and z how i implement that here?
OPs comment that "not using std::swap because it's slow" immediately raised red flags for the kind of attitude where everything someone else or the standard library made must be slow because they didn't make it then selves... Thanks for setting the record straight with respect to std::swap.

Stack Exchange Network

MSD Quick Radix Sort in Place in C++, Object/Pointer Oriented

Algorithm

Code

needs

Runtimes

RunTime code

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

MSD Quick Radix Sort in Place in C++, Object/Pointer Oriented

Algorithm

Code

needs

Runtimes

RunTime code

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions