Skip to main content
Code Review

Return to Question

replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/
Source Link

The previous iteration at Natural merge sort Natural merge sort

The previous iteration at Natural merge sort

The previous iteration at Natural merge sort

edited tags
Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238
Source Link
coderodde
  • 31.8k
  • 15
  • 77
  • 202

Natural merge sort - follow-up

The previous iteration at Natural merge sort

Now my code relies on C++14 and looks like:

natural_merge_sort.h:

#ifndef NATURAL_MERGE_SORT_H
#define NATURAL_MERGE_SORT_H
#include <algorithm>
#include <iterator>
/*******************************************************************************
* Implements a simple, array-based queue of integers. All three operations run *
* in constant time. This queue, however, does not check for under-/overflow of *
* underlying buffer because of performance considerations. *
*******************************************************************************/
class UnsafeIntQueue {
private:
 const size_t MINIMUM_CAPACITY = 256;
 size_t m_head;
 size_t m_tail;
 size_t m_size;
 size_t m_mask;
 size_t* m_buffer;
 /***************************************************************************
 * Makes sure a capacity is at least 'MINIMUM_CAPACITY' and is a power of *
 * two. *
 ***************************************************************************/
 size_t fixCapacity(size_t capacity)
 {
 capacity = std::max(capacity, MINIMUM_CAPACITY);
 size_t s = 1;
 while (s < capacity)
 {
 s <<= 1;
 }
 return s;
 }
public:
 /***************************************************************************
 * Constructs a new integer queue, which can accommodate 'capacit' amount *
 * integers. *
 ***************************************************************************/
 UnsafeIntQueue(size_t capacity) :
 m_head{0},
 m_tail{0},
 m_size{0}
 {
 capacity = fixCapacity(capacity);
 m_mask = capacity - 1;
 m_buffer = new size_t[capacity];
 }
 /***************************************************************************
 * Destroys this queue, which releases the underlying buffer. *
 ***************************************************************************/
 ~UnsafeIntQueue()
 {
 delete[] m_buffer;
 }
 /***************************************************************************
 * Appends the input integer to the tail of this queue. *
 ***************************************************************************/
 inline void enqueue(const size_t element)
 {
 m_buffer[m_tail & m_mask] = element;
 m_tail = (m_tail + 1) & m_mask;
 m_size++;
 }
 /***************************************************************************
 * Removes and returns the integer at the head of this queue. *
 ***************************************************************************/
 inline size_t dequeue()
 {
 const size_t ret = m_buffer[m_head];
 m_head = (m_head + 1) & m_mask;
 m_size--;
 return ret;
 }
 /***************************************************************************
 * Returns the amount of integers in this queue. *
 ***************************************************************************/
 inline size_t size() const
 {
 return m_size;
 }
};
/*******************************************************************************
* Scans the range [first, lst) and returns the queue containing sizes of each *
* run in the order they appear while scanning from left to right. *
*******************************************************************************/
template<class RandomIt, class Cmp>
std::unique_ptr<UnsafeIntQueue> build_run_size_queue(RandomIt first,
 RandomIt lst,
 Cmp cmp)
{
 const size_t length = std::distance(first, lst);
 UnsafeIntQueue* p_q = new UnsafeIntQueue(length / 2 + 1);
 RandomIt head;
 RandomIt left = first;
 RandomIt right = left + 1;
 const RandomIt last = lst - 1;
 while (left < last)
 {
 head = left;
 if (cmp(*right++, *left++))
 {
 // Reading a strictly descending run.
 while (left < last && cmp(*right, *left))
 {
 ++left;
 ++right;
 }
 p_q->enqueue(right - head);
 std::reverse(head, right);
 }
 else
 {
 // Reading a ascending run.
 while (left < last && !cmp(*right, *left))
 {
 ++left;
 ++right;
 }
 p_q->enqueue(left - head + 1);
 }
 ++left;
 ++right;
 }
 if (left == last)
 {
 // Handle the case of an orphan element at the end of the range.
 p_q->enqueue(1);
 }
 return std::unique_ptr<UnsafeIntQueue>(p_q);
}
/*******************************************************************************
* Returns the amount of leading zeros in 'num'. *
*******************************************************************************/
size_t leading_zeros(const size_t num)
{
 size_t count = 0;
 for (size_t t = (size_t) 1 << (8 * sizeof(t) - 1); t; t >>= 1, ++count)
 {
 if ((t & num))
 {
 return count;
 }
 }
 return count;
}
/*******************************************************************************
* Returns the amount of merge passes needed to sort a range with 'run_amount' *
* runs. *
*******************************************************************************/
size_t get_pass_amount(size_t run_amount)
{
 return 8 * sizeof(run_amount) - leading_zeros(run_amount - 1);
}
/*******************************************************************************
* Implements the natural merge sort, which sacrifices one pass over the input *
* range in order to establish an implicit queue of runs. A run is the longest *
* consecutive subsequence, in which all elements are ascending or strictly *
* descending. Every descending run is reversed to ascending run. We cannot *
* consider non-strictly descending runs, since that would sacrifice the stabi- *
* lity of the algorithm. After the run queue is establish, the algorithm re- *
* moves two runs from the head of the queue, merges them into one run, which *
* is then appended to the tail of the run queue. Merging continues until the *
* queue contains only one run, which denotes that the entire input range is *
* sorted. *
* *
* The best-case complexity is O(N), the average and worst-case complexity is *
* O(N log N). Space complexity is O(N). *
*******************************************************************************/
template<class RandomIt, class Cmp>
void natural_merge_sort(RandomIt first, RandomIt last, Cmp cmp)
{
 const size_t length = std::distance(first, last);
 if (length < 2)
 {
 // Trivially sorted.
 return;
 }
 typedef typename std::iterator_traits<RandomIt>::value_type value_type;
 // Scan the runs.
 std::unique_ptr<UnsafeIntQueue> p_queue = build_run_size_queue(first, last, cmp);
 // Request a buffer.
 RandomIt buffer = new value_type[length];
 // Count the amount of merge passes over the array required to bring order.
 const size_t merge_passes = get_pass_amount(p_queue->size());
 RandomIt source;
 RandomIt target;
 // Make sure that after the last merge pass, all data ends up in the input
 // container.
 if ((merge_passes & 1) == 1)
 {
 source = buffer;
 target = first;
 std::copy(first, last, buffer);
 }
 else
 {
 source = first;
 target = buffer;
 }
 size_t runs_left = p_queue->size();
 size_t offset = 0;
 // While there is runs to merge, do...
 while (p_queue->size() > 1)
 {
 // Remove two runs from the head of the run queue.
 size_t left_run_length = p_queue->dequeue();
 size_t right_run_length = p_queue->dequeue();
 std::merge(source + offset,
 source + offset + left_run_length,
 source + offset + left_run_length,
 source + offset + left_run_length + right_run_length,
 target + offset,
 cmp);
 // Append the merged run to the tail of the queue.
 p_queue->enqueue(left_run_length + right_run_length);
 runs_left -= 2;
 offset += left_run_length + right_run_length;
 // The current pass over the array is almost complete.
 switch (runs_left)
 {
 case 1:
 {
 const size_t single_length = p_queue->dequeue();
 std::copy(source + offset,
 source + offset + single_length,
 target + offset);
 p_queue->enqueue(single_length);
 }
 // FALL THROUGH!
 case 0:
 {
 runs_left = p_queue->size();
 offset = 0;
 RandomIt tmp = source;
 source = target;
 target = tmp;
 break;
 }
 }
 }
 delete[] buffer;
}
#endif

main.cpp:

#include <chrono>
#include <functional>
#include <iostream>
#include <random>
#include "natural_merge_sort.h"
/*******************************************************************************
* Creates a random integer array of length 'length', minimum integer *
* 'minimum', maximum integer 'maximum', using seed 'seed'. *
*******************************************************************************/
static int* get_random_int_array(const size_t length,
 const int minimum,
 const int maximum,
 const unsigned int seed)
{
 std::default_random_engine generator(seed);
 std::uniform_int_distribution<int> distribution(minimum, maximum);
 int* array = new int[length];
 for (size_t i = 0; i < length; ++i)
 {
 array[i] = distribution(generator);
 }
 return array;
}
/*******************************************************************************
* Create an array of pointers to integers. *
*******************************************************************************/
static int** get_random_int_pointer_array(const size_t length,
 const int minimum,
 const int maximum,
 const unsigned seed)
{
 std::default_random_engine generator(seed);
 std::uniform_int_distribution<int> distribution(minimum, maximum);
 int** array = new int*[length];
 for (size_t i = 0; i < length; ++i)
 {
 array[i] = new int(distribution(generator));
 }
 return array;
}
/*******************************************************************************
* Returns a strongly presorted array of integers. *
*******************************************************************************/
static int* get_presorted_int_array(const size_t length)
{
 int* array = new int[length];
 int num = 0;
 for (size_t i = 0; i < length / 2; ++i)
 {
 array[i] = num++;
 }
 for (size_t i = length / 2; i < length; ++i)
 {
 array[i] = num--;
 }
 return array;
}
/*******************************************************************************
* Returns the milliseconds since the Unix epoch. *
*******************************************************************************/
static unsigned long long get_milliseconds()
{
 return std::chrono::duration_cast<std::chrono::milliseconds>(
 std::chrono::system_clock::now().time_since_epoch()).count();
}
/*******************************************************************************
* Profiles the 'std::stable_sort' agains the range ['begin', 'end') using the *
* comparator 'cmp'. *
*******************************************************************************/
template<class T, class Cmp>
static void profile_stable_sort(T begin, T end, Cmp cmp)
{
 unsigned long long ta = get_milliseconds();
 std::stable_sort(begin, end, cmp);
 unsigned long long tb = get_milliseconds();
 std::cout << "std::stable_sort in "
 << (tb - ta)
 << " milliseconds. "
 << "Sorted: "
 << std::is_sorted(begin, end, cmp)
 << std::endl;
}
/*******************************************************************************
* Profiles the 'natural_merge_sort' agains the range ['begin', 'end') using *
* the comparator 'cmp'. *
*******************************************************************************/
template<class T, class Cmp>
void profile_natural_merge_sort(T begin, T end, Cmp cmp)
{
 unsigned long long ta = get_milliseconds();
 natural_merge_sort(begin, end, cmp);
 unsigned long long tb = get_milliseconds();
 std::cout << "natural_merge_sort in "
 << (tb - ta)
 << " milliseconds. "
 << "Sorted: "
 << std::is_sorted(begin, end, cmp)
 << std::endl;
}
/*******************************************************************************
* Profiles the sorting algorithms on a random integer array. *
*******************************************************************************/
static void profile_on_random_array(const size_t sz,
 const int minimum,
 const int maximum,
 const unsigned seed)
{
 int* array1 = get_random_int_array(sz, minimum, maximum, seed);
 int* array2 = new int[sz];
 std::copy(array1, array1 + sz, array2);
 std::cout << "--- PROFILING ON RANDOM ARRAY OF LENGTH "
 << sz
 << " ---"
 << std::endl;
 profile_stable_sort(array1, 
 array1 + sz, 
 std::less<>());
 profile_natural_merge_sort(array2, 
 array2 + sz, 
 std::less<>());
 std::cout << "Same contents: "
 << std::equal(array1, array1 + sz, array2, array2 + sz)
 << std::endl
 << std::endl;
}
/*******************************************************************************
* Profiles the sorting algorithms on an array of pointers to random integers. *
*******************************************************************************/
static void profile_on_integer_pointer_array(const size_t sz,
 const int minimum,
 const int maximum,
 const unsigned seed)
{
 std::cout << "--- PROFILING ON RANDOM POINTER ARRAY OF LENGTH "
 << sz
 << " ---"
 << std::endl;
 int** array1 = get_random_int_pointer_array(sz,
 minimum,
 maximum,
 seed);
 int** array2 = new int*[sz];
 std::copy(array1, array1 + sz, array2);
 auto lambda = [](int* a, int* b){
 return *a < *b;
 };
 profile_stable_sort(array1, 
 array1 + sz, 
 lambda);
 profile_natural_merge_sort(array2, 
 array2 + sz, 
 lambda);
 std::cout << "Same contents: "
 << std::equal(array1, array1 + sz, array2, array2 + sz)
 << std::endl
 << std::endl;
}
/*******************************************************************************
* Profiles the sorting algorithms on a presorted array. *
*******************************************************************************/
static void profile_on_presorted_array(const size_t sz)
{
 std::cout << "--- PROFILING ON PRESORTED ARRAY OF LENGTH "
 << sz
 << " ---"
 << std::endl;
 int* array1 = get_presorted_int_array(sz);
 int* array2 = new int[sz];
 std::copy(array1, array1 + sz, array2);
 profile_stable_sort(array1, 
 array1 + sz, 
 std::less<>());
 profile_natural_merge_sort(array2, 
 array2 + sz, 
 std::less<>());
 std::cout << "Same contents: "
 << std::equal(array1, array1 + sz, array2, array2 + sz)
 << std::endl
 << std::endl;
}
/*******************************************************************************
* The entry point to a demo program. *
*******************************************************************************/
int main(int argc, const char * argv[]) {
 unsigned long long seed = get_milliseconds();
 std::cout << "Seed: "
 << seed
 << std::endl
 << std::endl;
 const size_t length = 5000000;
 const int min_int = -10000;
 const int max_int = 30000;
 std::cout << std::boolalpha;
 profile_on_random_array(length, min_int, max_int, seed);
 profile_on_integer_pointer_array(length, min_int, max_int, seed);
 profile_on_presorted_array(length);
 return 0;
}

I have done the following:

  • is_sorted changed to std::is_sorted (no need to reinvent the wheel)
  • are_equal changed to std::equal
  • compare_int changed to std::less<int>
  • compare_dereference changed to a lambda
  • merge changed to std::merge
lang-cpp

AltStyle によって変換されたページ (->オリジナル) /