My implementation of 3 way quick sort
for strings.
It supposed to sort very large set of 800,000,000 objects.
This is why I added Lense
class.
Suppose you have object like this:
struct User{
int id;
std::string name;
};
Then Lense
class should be something like this:
struct UserLense{
auto const &operator()(User const &a) const{
return a.name;
}
};
I especially cared about NOT to overflow the stack.
I use shell sort
if recursion get too deep. I choose this sort because it is not recursive and is relatively fast.
I also kept the insertion sort
as well, only reason is - everyone doing it this way.
When I try with real data, I probably remove insertion sort
and replace it with shell sort
.
I tried to separate 3 way partitioning (Dutch National Flag) in separate function, but code become more unclear.
Do not like how tail recursion happen, but similar to 3 way partitioning, code become more unclear.
Thank you in advance!
Here is the code:
#include <string_view>
#include <cstdio> // also provide size_t
namespace three_way_quicksort_implementation_{
constexpr short CUTOFF_INS = 8;
constexpr short CUTOFF_DEEP = 32;
namespace{
template<typename It>
void swapIt(It a, It b){
using std::swap;
swap(*a, *b);
}
enum class M3{
a,
b,
c
};
template<typename T>
constexpr static M3 max3(T const &a, T const &b, T const &c){
if (a > b){
if (a > c)
return M3::a;
else
return M3::c;
}else{
if (b > c)
return M3::b;
else
return M3::c;
}
}
} // namespace
template<typename Lense>
struct Insertion_Sort{
template<typename It>
void operator()(It first, It last, size_t digit) const{
for (It i = first; i < last; ++i)
for (It j = i; j > first && compare(j, j - 1, digit); --j)
swapIt(j, j - 1);
}
private:
constexpr static auto subAt(std::string_view s, size_t digit){
return digit < s.size() ? s.substr(digit) : "";
}
template<typename It>
constexpr static bool compare(It a, It b, size_t digit){
Lense _;
return subAt(_(*a), digit) < subAt(_(*b), digit);
}
};
template<typename Lense>
struct Shell_Sort{
template<typename It>
void operator()(It first, It last, size_t digit) const{
size_t const size = std::distance(first, last);
size_t step = size - 1;
for(;;){
bool sorted = false;
while ( ! sorted ){
sorted = true;
for (size_t i = 0; i < size - step; ++i){
auto j = i + step;
if (compare(first + i, first + j, digit)){
swapIt(first + i, first + j);
sorted = false;
}
}
}
if (sorted && step == 1)
break;
step = step / 2;
if (step < 1)
step = 1;
}
}
private:
constexpr static auto subAt(std::string_view s, size_t digit){
return digit < s.size() ? s.substr(digit) : "";
}
template<typename It>
constexpr static bool compare(It a, It b, size_t digit){
Lense _;
return subAt(_(*a), digit) > subAt(_(*b), digit);
}
};
// Based on:
// https://stackoverflow.com/questions/6972635/efficient-string-sorting-algorithm
// https://www.geeksforgeeks.org/3-way-radix-quicksort-in-java/
template<typename Lense>
struct Quick3Way_Sort{
template<typename It>
void operator()(It first, It last, size_t digit, size_t deep) const{
return sort(first, last, digit, deep);
}
private:
template<typename It>
constexpr static void median(It first, It last, size_t digit){
It mid = first + ((last - first) >> 1);
if (charAt(mid, digit) < charAt(first, digit))
swapIt(first, mid);
if (charAt(mid, digit) < charAt(last, digit))
swapIt(last, mid);
if (charAt(first, digit) < charAt(last, digit))
swapIt(first, last);
}
template<typename It>
void sort(It first, It last, size_t digit, size_t deep) const{
++deep;
// controls tail recursion.
for(;;){
auto const distance = std::distance(first, last);
if (distance <= 1)
return;
if (distance <= CUTOFF_INS){
// printf("Cut Off: distance %zu\n", std::distance(first, last) );
Insertion_Sort<Lense>{}(first, last, digit);
}
if (deep > CUTOFF_DEEP){
// printf("Cut Off: too deep recursion %zu\n", std::distance(first, last) );
Shell_Sort<Lense>{}(first, last, digit);
}
auto lt = first;
auto gt = last - 1;
auto it = first + 1;
median(lt, gt, digit);
// partition
auto const pivot = charAt(lt, digit);
while (it <= gt) {
auto const t = charAt(it, digit);
if (t < pivot)
swapIt(lt++, it++);
else if (t > pivot)
swapIt(it, gt--);
else
++it;
}
// handle tail recursion
auto const max = max3(
std::distance(first, lt),
std::distance(lt, gt + 1),
std::distance(gt + 1, last)
);
switch(max){
case M3::a:
if (pivot >= 0)
sort(lt, gt + 1, digit + 1, deep);
sort(gt + 1, last, digit, deep);
// sort(first, lt, digit, deep);
// prepare tail recursion
last = lt;
break;
case M3::b:
sort(first, lt, digit, deep);
sort(gt + 1, last, digit, deep);
if (pivot >= 0){
// sort(lt, gt + 1, digit + 1);
// prepare tail recursion
first = lt;
last = gt + 1;
++digit;
break;
}
return;
case M3::c:
sort(first, lt, digit, deep);
if (pivot >= 0)
sort(lt, gt + 1, digit + 1, deep);
// sort(gt + 1, last, digit, deep);
// prepare tail recursion
first = gt + 1;
break;
}
}
}
private:
constexpr static int charAt(std::string_view s, size_t digit){
return digit < s.size() ? s[digit] : -1;
}
template<typename It>
constexpr static int charAt(It it, size_t digit){
Lense _;
return charAt(_(*it), digit);
}
};
template<typename Lense, typename It>
void doInsertionSort(It lo, It hi) {
size_t const digit = 0;
return Insertion_Sort<Lense>{}(lo, hi, digit);
}
template<typename Lense, typename It>
void doShellSort(It lo, It hi) {
size_t const digit = 0;
Shell_Sort<Lense>{}(lo, hi, digit);
}
template<typename Lense, typename It>
void doThreeWayQuickSort(It lo, It hi) {
size_t const digit = 0;
size_t const deep = 0;
Quick3Way_Sort<Lense>{}(lo, hi, digit, deep);
}
struct StandardLense{
template<typename T>
T const &operator()(T const &a) const{
return a;
}
};
}
template<typename Lense = three_way_quicksort_implementation_::StandardLense, typename It>
void threeWayQuickSort(It first, It last){
using namespace three_way_quicksort_implementation_;
// return doInsertionSort<Lense>(first, last);
// return doShellSort<Lense>(first, last);
return doThreeWayQuickSort<Lense>(first, last);
}
#include <algorithm>
#include <iostream>
template<typename T>
void testPrint(T const &data){
for(auto const &x : data)
std::cout << "- " << x << '\n';
std::cout << "--------------------------" << '\n';
}
// #include "sortdata.h"
// in real code I use array of 10,000 - 20,000 strings.
// you have to replace this part,
// else it will proceed with Insertion sort.
std::string_view data[] = {
"aaaa",
"zzzzzzzzzzz",
"xxxx",
"zzzzzzzzzzz123456789",
"bbbb",
};
int main(){
threeWayQuickSort(std::begin(data), std::end(data));
// std::sort(std::begin(data), std::end(data));
if constexpr(0)
testPrint(data);
bool const b = std::is_sorted(std::begin(data), std::end(data));
std::cout << ( b ? "OK" : "Error" ) << '\n';
if constexpr(0)
testPrint(data);
}
Update
I did put the sort in real world use:
The speedup with real world data is 32%.
The speedup with made up data is 27%.
Recursion of 32th level never happened and shell sort
never kick in. I guess I wont change it to non-recursive heap sort
.
Update
Final code is here:
https://github.com/nmmmnu/HM4/blob/master/include/threewayquicksort.h
When I use with real live data, this configuration kick recursion more than 16 only two times and shell sort
sort array of size 6-7 elements.
if insertion sort
is removed, the result have similar performance speed.
-
1\$\begingroup\$ Why shell sort when the recursion gets too deep, instead of the more usual heap sort? Was it faster in practice perhaps? They're both O(1) space, but shell sort has a less good worst-case time complexity \$\endgroup\$user555045– user5550452022年12月14日 04:27:22 +00:00Commented Dec 14, 2022 at 4:27
-
\$\begingroup\$ hmmm. may be you are right. I love shell sort and I had it already implemented. however heapsort is even easier, since std::stable_sort is heap sort, also there is make_heap / sort_heap. also heapsort is guaranteed O n log n \$\endgroup\$Nick– Nick2022年12月14日 08:57:23 +00:00Commented Dec 14, 2022 at 8:57
1 Answer 1
Library headers
We include <cstdio>
but never use it.
Conversely, we use std::cout
but never include <iostream>
.
std::size_t
is misspelt throughout the code.
With those issues fixed, the code compiles without warnings.
Terminology
The template type Lense
is called a projection in the Standard Library algorithms, including std::sort
. I recommend using the standard terminology so that users know what to expect.
Use the standard library
Instead of writing swapIt()
, just use std::iter_swap()
.
If we use std::invoke()
instead of calling the projection directly, we'll be able to accept pointers to data or function members as well as free functions - that's much more flexible, and what the standard library sort functions do.
Useless class
The three *_Sort
classes have no state; they don't need to be classes at all. Replacing with a template functions makes the code much simpler (and eliminates the need for the do*Sort()
functions).
-
\$\begingroup\$ Thanks a lot. stdio imports size_t. I always use size_t and not std:size_t. Projection - I agree, will rename it, also will use invoke. The classes has state - it is the template parameter. Also I expect that probably will need to pass the projection as constructor parameter, that depends when I use it in real world app (will happen very soon) \$\endgroup\$Nick– Nick2022年12月13日 17:45:35 +00:00Commented Dec 13, 2022 at 17:45
-
\$\begingroup\$ How did you compile the code to see if library is never used? Any compiler flag I am not aware of? \$\endgroup\$Nick– Nick2022年12月13日 17:47:47 +00:00Commented Dec 13, 2022 at 17:47
-
\$\begingroup\$ Just inspection - I fixed that and the
std::size_t
before I even attempted to compile. \$\endgroup\$Toby Speight– Toby Speight2022年12月13日 19:34:16 +00:00Commented Dec 13, 2022 at 19:34 -
\$\begingroup\$ The template parameter is not state; it is a part of the type (just as it is part of a template function). State is something that differs from instance to instance. Try it: you'll see that there's no need for those classes. \$\endgroup\$Toby Speight– Toby Speight2022年12月13日 19:36:55 +00:00Commented Dec 13, 2022 at 19:36
-
\$\begingroup\$
<cstdio>
declaresstd::size_t
. It's allowed to also definesize_t
, but that's not a requirement, so portable code cannot depend on that. \$\endgroup\$Toby Speight– Toby Speight2022年12月13日 19:51:50 +00:00Commented Dec 13, 2022 at 19:51
Explore related questions
See similar questions with these tags.