Memory: O(log2(max)*2)~O(1)
Speed: O(log2(max)*n)~O(n)
so i did before a MSD Radix Sort in place but i wanted to do one, with out the count, So i join together the quick sort and radix sort. Think as msd radix sort mod of 2.
Algorithm
i will explain this one directly with a table.
Code
start,finish,depth,mid are copy value because we can't modify because is a recursive funtion
s,f,mod_cal,auxiliar are static for fast calling and the memory usage
#pragma once
namespace leixor {
//fast log in power of 2 we are using size so we can't use bit_width
//is in excess on purpose
static size_t& logb2(size_t num) {
size_t aux = 1;
if (num == 0) return (aux = -1);
while (0 < (num >>= 1))
aux++;
return aux;
}
//------------------------obj version---------------------------
//custom get_max because i don't wanna add lib,and normaly we will use this in a custom class
template<class obj_arr>
static size_t& get_max(obj_arr*& arr, const size_t& size) {
size_t i = size%2, aux = arr[0].get_size_t(), aux2=-1;
if (size == 0)return aux2;
for (; i < size; i += 2) {
aux2 = arr[i].get_size_t();
aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
aux2 = arr[i + 1].get_size_t();
aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
//aux= (aux < aux2)?aux2:aux; are the same speed as the lane before
}
return aux;
}
//----------------------------pointer obj version---------------------
template<class obj_arr>
static size_t& get_max(obj_arr**& arr, const size_t& size) {
size_t i = size%2, aux = arr[0]->get_size_t(), aux2=-1;
if (size == 0)return aux2;
for (; i < size; i += 2) {
aux2 = arr[i]->get_size_t();
aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
aux2 = arr[i + 1]->get_size_t();
aux = aux2 * (aux < aux2) + aux * (aux >= aux2);
}
return aux;
}
//---------------------------------------------------------------------
//-----------------object orientedversion---------
template<class obj_arr>
void two_rad(obj_arr*& arr, size_t start, size_t finish, size_t depth) {
depth--;
static size_t s, f;
static bool mod_cal;
static obj_arr auxiliar;
f = finish - 1; s = start;
while (s < f) {
auxiliar = arr[s];
if (mod_cal=(auxiliar.get_size_t() >> depth) % 2) {
arr[s] = arr[f];
arr[f] = auxiliar;
f--;
}
s+=!mod_cal;
}
size_t mid = s + !((arr[s].get_size_t() >> depth) % 2);
if (depth > 0) {
if (mid - start > 2)
two_rad(arr, start, mid, depth);
else if (mid - start > 1) {
mod_cal = arr[start].get_size_t() > arr[start + 1].get_size_t();
auxiliar = arr[start];
arr[start] = arr[start + mod_cal];
arr[start + mod_cal] = auxiliar;
}
if (finish - mid > 2)
two_rad(arr, mid, finish, depth);
else if (finish - mid > 1) {
mod_cal = arr[mid].get_size_t() > arr[mid + 1].get_size_t();
auxiliar = arr[mid];
arr[mid] = arr[mid + mod_cal];
arr[mid + mod_cal] = auxiliar;
}
}
}
template<class obj_arr>
bool qick_radix_sort(obj_arr* arr, size_t size) {
if (arr == nullptr || size < 2)
return false;
two_rad(arr, 0, size, logb2(get_max(arr, size)));
return true;
}
//-----------------pointer orientedversion---------
template<class obj_arr>
void two_rad(obj_arr**& arr, size_t start, size_t finish, size_t depth) {
depth--;
static size_t s, f;
static bool mod_cal;
static obj_arr* auxiliar;
f = finish - 1; s = start;
while (s < f) {
auxiliar = arr[s];
if (mod_cal=(auxiliar->get_size_t() >> depth) % 2) {
arr[s] = arr[f];
arr[f] = auxiliar;
f--;
}
s+=!mod_cal;
}
size_t mid =s+ !((arr[s]->get_size_t() >> depth)%2);
if (depth>0){
if (mid-start>2)
two_rad(arr,start,mid,depth);
else if (mid - start > 1) {
mod_cal = arr[start]->get_size_t() > arr[start + 1]->get_size_t();
auxiliar = arr[start];
arr[start] = arr[start + mod_cal];
arr[start + mod_cal] = auxiliar;
}
if (finish - mid > 2)
two_rad(arr,mid,finish,depth);
else if (finish - mid > 1) {
mod_cal = arr[mid]->get_size_t() > arr[mid + 1]->get_size_t();
auxiliar = arr[mid];
arr[mid] = arr[mid + mod_cal];
arr[mid + mod_cal] = auxiliar;
}
}
}
template<class obj_arr>
bool qick_radix_sort(obj_arr** arr, size_t size) {
if (arr == nullptr || size < 2)
return false;
two_rad(arr, 0, size, logb2(get_max(arr, size)));
return true;
}
}
needs
funtion inside a class as get_size_t return a size_t variable can be reference
and the calling of this sort is leixor::qick_radix_sort(arr, size);
ex:std::vector<D_CLASS> a; leixor::qick_radix_sort(a.data(), a.size());
Runtimes
the runtimes are in miliseconds, avarge of 100 times
optimization, new runtimes need it
/*
--------------------------------------------------
----------------object oriented version-----------
--------------------------------------------------
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.002613|0.020518|0.216481|2.23518|22.4944|222.093|2197.06
--------------------------------------------------
num|2 |0.003194|0.033248|0.31865|3.20488|31.2802|319.138|3135.21
--------------------------------------------------
pow|3 |0.003617|0.039951|0.383426|3.9421|39.9787|419.901|4099.95
--------------------------------------------------
2 |4 |0.003958|0.046783|0.478583|4.77747|47.9917|514.014|5001.75
--------------------------------------------------
|5 |0.003758|0.06327|0.604892|6.03168|58.432|579.918|5871.16
--------------------------------------------------
|6 |0.004298|0.064584|0.671869|6.92542|71.2283|703.449|6784.92
--------------------------------------------------
|7 |0.003727|0.062074|0.738229|7.47593|75.0941|752.692|7585.21
--------------------------------------------------
--------------------------------------------------
----------------pointer oriented version-----------
--------------------------------------------------
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.001209|0.006741|0.069374|0.888095|16.3528|186.809|1814.67
--------------------------------------------------
num|2 |0.001486|0.010471|0.107467|1.23582|27.598|312.24|3067.28
--------------------------------------------------
pow|3 |0.001785|0.014352|0.140364|1.69012|33.9956|428.07|4294.08
--------------------------------------------------
2 |4 |0.002111|0.018542|0.176611|2.13729|39.5618|549.902|5661.17
--------------------------------------------------
|5 |0.002112|0.022938|0.216402|2.51519|50.7784|683.679|7022.75
--------------------------------------------------
|6 |0.002354|0.027141|0.260751|2.9174|57.0826|795.653|8816.75
--------------------------------------------------
|7 |0.00232|0.030715|0.290544|3.32639|61.756|917.006|10731
--------------------------------------------------
*/
RunTime code
average of 100
#include "qick_radix_sort.h"
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <chrono>
#include<string>
#include <array>
#include <math.h>
#include <bitset>
class data
{
public:
data() {}
data(size_t& A):m(A) {};
size_t& get_size_t() { return m; };
void operator = (data& A) { m = A.m; };
void operator = (size_t& A) { m = A; };
size_t m;
};
void object_version(const size_t pow10, const size_t pow2) {
size_t size = pow(10, pow10);
data* arr = new data[size];
size_t mod = 1 << (2 + pow2);
float mediana = 0.0;
for (size_t i = 0; i < 100; i++)
{
for (size_t i = 0; i < size; i++) {
arr[i].m = rand() % mod;
}
auto started = std::chrono::high_resolution_clock::now();
leixor::qick_radix_sort(arr, size);
auto done = std::chrono::high_resolution_clock::now();
mediana += float(std::chrono::duration_cast<std::chrono::nanoseconds>(done - started).count()) / float(1000000);
}
std::cout << "|" << mediana /100;
delete[] arr;
};
void pointer_object_version(const size_t pow10, const size_t pow2) {
size_t size = pow(10, pow10);
data** arr = new data * [size];
size_t mod = 1 << (1+ pow2);
float mediana = 0.0;
for (size_t i = 0; i < size; i++)
arr[i] = new data();
for (size_t i = 0; i < 100; i++)
{
for (size_t i = 0; i < size; i++) {
arr[i]->m = rand() % mod;
}
auto started = std::chrono::high_resolution_clock::now();
leixor::qick_radix_sort(arr, size);
auto done = std::chrono::high_resolution_clock::now();
mediana += float(std::chrono::duration_cast<std::chrono::nanoseconds>(done - started).count()) / float(1000000);
}
std::cout << "|" << mediana / 100;
for (size_t i = 0; i < size; i++) {
delete arr[i];
}
delete[] arr;
};
int main() {
std::cout << "starting...\n";
std::array<std::string, 7> alfa = { "\nmax|1 ","\nnum|2 ","\npow|3 ","\n2 |4 ","\n |5 " ,"\n |6 " ,"\n |7 " };
std::cout <<
"\n --------------------------------------------------" <<
"\n ----------------object oriented version-----------" <<
"\n --------------------------------------------------\n\n";
std::cout <<
"\n ---------------------size pow 10------------------" <<
"\n --------------------------------------------------" <<
"\n | 1 | 2 | 3 | 4 | 5 | 6 | 7 |";
for (size_t z = 0; z < 7; z++) {
std::cout << "\n --------------------------------------------------" <<
alfa[z];
for (size_t i = 1; i < 8; i++) {
object_version(i, z);
}
}
std::cout << "\n --------------------------------------------------\n";
std::cout <<
"\n --------------------------------------------------" <<
"\n ----------------pointer oriented version-----------" <<
"\n --------------------------------------------------\n\n";
std::cout <<
"\n ---------------------size pow 10------------------" <<
"\n --------------------------------------------------" <<
"\n | 1 | 2 | 3 | 4 | 5 | 6 | 7 |";
for (size_t z = 0; z < 7; z++) {
std::cout << "\n --------------------------------------------------" <<
alfa[z];
for (size_t i = 1; i < 8; i++) {
pointer_object_version(i, z);
}
}
std::cout << "\n --------------------------------------------------\n";
}
1 Answer 1
Review:
static size_t& logb2(size_t num) {
size_t aux = 1;
if (num == 0) return (aux = -1);
while (0 < (num >>= 1))
aux++;
return aux;
}
This is returning a reference to a local object. aux
goes out of scope at the end of the function, so there's no way that the calling function can use a reference to it.
It might work now, but on another compiler it might crash. It also might stop working (a crash, or worse: silently returning incorrect results) if you change your compiler options, or change your program slightly.
get_max
has the same problem.
size_t i = size%2, aux = arr[0].get_size_t(), aux2=-1;
if (size == 0)return aux2;
Accessing arr[0]
is not safe if size == 0
, so the size check needs to be done first.
static size_t s, f;
static bool mod_cal;
static obj_arr auxiliar;
These do not need to be static
. Making them static is likely to make compiler optimization more difficult, and make the program slower.
depth--;
It would be more usual to decrement depth when calling the next level of recursive function. (Which would mean we don't need the "off-by-one" log base 2 function).
size_t mid =s+ !((arr[s]->get_size_t() >> depth)%2);
if (depth>0){
if (mid-start>2)
two_rad(arr,start,mid,depth);
else if (mid - start > 1) {
mod_cal = arr[start]->get_size_t() > arr[start + 1]->get_size_t();
auxiliar = arr[start];
arr[start] = arr[start + mod_cal];
arr[start + mod_cal] = auxiliar;
}
if (finish - mid > 2)
two_rad(arr,mid,finish,depth);
else if (finish - mid > 1) {
mod_cal = arr[mid]->get_size_t() > arr[mid + 1]->get_size_t();
auxiliar = arr[mid];
arr[mid] = arr[mid + mod_cal];
arr[mid + mod_cal] = auxiliar;
}
}
Most of this special-case logic is unnecessary. We can simply call the next level function, and check the size there before doing any work. i.e.:
// ... at the top of two_rad()
if (finish - start < 2) return; // nothing to do!
... processing ...
// ... at the bottom of two_rad():
if (depth == 0) return;
two_rad(arr, start, mid, depth - 1);
two_rad(arr, mid, finish, depth - 1);
Changes:
I made a series of changes to see what happened performance-wise. (Only looking at the "object-oriented" version).
Initial performance looked like this:
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.000244|0.001618|0.014842|0.146165|1.44533
--------------------------------------------------
num|2 |0.0003|0.002286|0.020734|0.210913|2.11932
--------------------------------------------------
pow|3 |0.000343|0.003042|0.028168|0.279006|2.77773
--------------------------------------------------
2 |4 |0.000356|0.003863|0.034078|0.345635|3.51738
--------------------------------------------------
|5 |0.000365|0.004637|0.040989|0.409529|4.11875
--------------------------------------------------
Changes:
- Removing
data
class, and taking an array ofsize_t
instead of a template argument. - Using
*std::max_element(arr, arr + size);
instead of the customget_max
function.
Results: (pretty much the same)
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.000215|0.001568|0.015363|0.146712|1.45304
--------------------------------------------------
num|2 |0.000287|0.002171|0.020847|0.206735|2.07393
--------------------------------------------------
pow|3 |0.000328|0.002852|0.026889|0.26842|2.70905
--------------------------------------------------
2 |4 |0.000332|0.003548|0.033133|0.329062|3.28266
--------------------------------------------------
|5 |0.000344|0.004271|0.039608|0.390457|3.90385
--------------------------------------------------
Changes:
- Removing the unnecessary
static
variables. - Use
std::swap
.
Results: (pretty much the same)
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.000225|0.00159|0.015389|0.146791|1.5249
--------------------------------------------------
num|2 |0.000288|0.002196|0.020787|0.203742|2.06895
--------------------------------------------------
pow|3 |0.000339|0.002963|0.028822|0.264389|2.66769
--------------------------------------------------
2 |4 |0.000356|0.003552|0.033073|0.328369|3.27564
--------------------------------------------------
|5 |0.000348|0.004297|0.039552|0.387799|3.89064
--------------------------------------------------
Changes:
- Simplify the recursion (remove checking for size 1 or size 2 arrays as described above, just call the next
two_rad
function).
Results: (pretty much the same)
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.000222|0.00158|0.015265|0.148654|1.46329
--------------------------------------------------
num|2 |0.000286|0.002178|0.020024|0.203993|2.07686
--------------------------------------------------
pow|3 |0.000377|0.002855|0.025927|0.27132|2.68528
--------------------------------------------------
2 |4 |0.000504|0.003546|0.033194|0.329328|3.3009
--------------------------------------------------
|5 |0.000607|0.004372|0.038077|0.377845|3.9096
--------------------------------------------------
Changes:
- Switch to using iterators (pointers).
- Change bit checking operation to
(value & T{ 1 } << bit);
Results: (slightly faster)
---------------------size pow 10------------------
--------------------------------------------------
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
--------------------------------------------------
max|1 |0.000192|0.001295|0.013703|0.137851|1.25551
--------------------------------------------------
num|2 |0.000288|0.001852|0.017461|0.1866|1.73913
--------------------------------------------------
pow|3 |0.000376|0.002432|0.02283|0.222846|2.25643
--------------------------------------------------
2 |4 |0.000469|0.003089|0.027707|0.272459|2.7158
--------------------------------------------------
|5 |0.00058|0.003853|0.033452|0.32207|3.27698
--------------------------------------------------
With this last version, we have one less function argument to pass in two_rad
, so it's slightly faster because of that. The alternative method of checking the set bit is also slightly faster.
The final code looked like this:
#include <algorithm>
#include <bit>
#include <cmath>
#include <type_traits>
template<class T>
bool is_bit_set(T value, T bit)
{
return value & (T{ 1 } << bit);
//return ((value >> bit) % 2);
}
template<class It>
void two_rad(It begin, It end, typename std::iterator_traits<It>::value_type bit)
{
if (begin == end) return;
auto s = begin;
auto f = std::prev(end);
while (s != f)
{
if (is_bit_set(*s, bit))
{
std::swap(*s, *f);
--f;
}
else
++s;
}
if (bit == 0) return;
if (!is_bit_set(*s, bit))
++s;
two_rad(begin, s, bit - 1);
two_rad(s, end, bit - 1);
}
template<class It,
class = std::enable_if_t<std::is_unsigned_v<typename std::iterator_traits<It>::value_type>> >
void quick_radix_sort(It begin, It end)
{
if (begin == end) return;
auto max = *std::max_element(begin, end);
if (max == 0) return;
auto max_bit = std::bit_width(max) - typename std::iterator_traits<It>::value_type{ 1 };
two_rad(begin, end, max_bit);
}
-
\$\begingroup\$ one question if a have a class like idk point with x,y and z how i implement that here? \$\endgroup\$bigotes0invisibles– bigotes0invisibles2021年02月13日 07:01:50 +00:00Commented Feb 13, 2021 at 7:01
-
2\$\begingroup\$ OPs comment that "not using std::swap because it's slow" immediately raised red flags for the kind of attitude where everything someone else or the standard library made must be slow because they didn't make it then selves... Thanks for setting the record straight with respect to std::swap. \$\endgroup\$Emily L.– Emily L.2021年02月13日 09:52:20 +00:00Commented Feb 13, 2021 at 9:52
Explore related questions
See similar questions with these tags.