This post is about computing order statistics: given an array \$A\$ (not necessarily sorted), find the \$k\$th smallest array component. The entire repository is here.
com.github.coderodde.algo.selection.Selector.java:
package com.github.coderodde.algo.selection;
/**
* This interface defines the API for the selection algorithms.
*
* @author Rodion "rodde" Efremov
* @param <E> the array component type.
* @version 1.6 (Sep 25, 2022)
* @since 1.6 (Sep 25, 2022)
*/
public sealed interface Selector<E extends Comparable<? super E>>
permits LinearTimeSelector, SortingSelector, RandomizedSelector {
E select(E[] array, int k);
E select(E[] array, int k, int fromIndex, int toIndex);
}
com.github.coderodde.algo.selection.LinearTimeSelector.java:
package com.github.coderodde.algo.selection;
import static com.github.coderodde.algo.selection.Support.checkArray;
import static com.github.coderodde.algo.selection.Support.checkK;
import static com.github.coderodde.algo.selection.Support.checkRangeIndices;
import java.util.Arrays;
/**
* This class provides a method for {@code k}th order statistics in worst case
* linear time.
*
* @author Rodion "rodde" Efremov
* @param <E> the element type.
* @version 1.6 (Feb 18, 2022)
* @since 1.6 (Feb 18, 2022)
*/
public final class LinearTimeSelector<E extends Comparable<? super E>>
implements Selector<E> {
private static final int GROUP_LENGTH = 5;
/**
* {@inheritDoc }
*/
@Override
public E select(E[] array, int k, int fromIndex, int toIndex) {
checkArray(array);
checkK(array.length, k);
checkRangeIndices(fromIndex, toIndex);
return selectImpl(array, k, fromIndex, toIndex);
}
/**
* {@inheritDoc }
*/
@Override
public E select(E[] array, int k) {
checkArray(array);
return select(array, k, 0, array.length);
}
private static <E extends Comparable<? super E>> E
selectImpl(E[] array, int index, int fromIndex, int toIndex) {
E[] medians = getGroupMedians(array,
fromIndex,
toIndex);
E pivot;
if (medians.length <= 5) {
pivot = medians[medians.length / 2];
} else {
pivot = selectImpl(medians, medians.length / 2, 0, medians.length);
}
int leftArrayLength = 0;
int rightArrayLength = 0;
for (int i = fromIndex; i < toIndex; i++) {
E datum = array[i];
if (datum.compareTo(pivot) < 0) {
leftArrayLength++;
} else if (datum.compareTo(pivot) > 0) {
rightArrayLength++;
}
}
E[] leftArray = Arrays.copyOf(array, leftArrayLength);
E[] rightArray = Arrays.copyOf(array, rightArrayLength);
for (int i = fromIndex, outputIndex = 0; i < toIndex; i++) {
E datum = array[i];
if (datum.compareTo(pivot) < 0) {
leftArray[outputIndex++] = datum;
}
}
for (int i = fromIndex, outputIndex = 0; i < toIndex; i++) {
E datum = array[i];
if (datum.compareTo(pivot) > 0) {
rightArray[outputIndex++] = datum;
}
}
int k = leftArrayLength;
if (index < k) {
return selectImpl(leftArray, index, 0, leftArrayLength);
} else if (index > k) {
return selectImpl(rightArray, index - k - 1, 0, rightArrayLength);
} else {
return pivot;
}
}
private static <E extends Comparable<? super E>>
E[] getGroupMedians(E[] array, int fromIndex, int toIndex) {
int arrayLength = toIndex - fromIndex;
int numberOfGroups = arrayLength / 5 +
(arrayLength % 5 != 0 ? 1 : 0);
E[] outputArray = Arrays.copyOfRange(array, 0, numberOfGroups);
int groupStartIndex = fromIndex;
int outputArrayIndex = 0;
for (int groupIndex = 0; groupIndex < numberOfGroups; groupIndex++) {
int groupEndIndex =
Math.min(groupStartIndex + GROUP_LENGTH, toIndex);
Arrays.sort(array, groupStartIndex, groupEndIndex);
int groupLength = groupEndIndex - groupStartIndex;
int medianIndex = (groupLength - 1) / 2;
outputArray[outputArrayIndex++] = array[groupStartIndex +
medianIndex];
groupStartIndex = groupEndIndex;
}
return outputArray;
}
}
com.github.coderodde.algo.selection.RandomizedSelector.java:
package com.github.coderodde.algo.selection;
import static com.github.coderodde.algo.selection.Support.checkArray;
import static com.github.coderodde.algo.selection.Support.checkK;
import static com.github.coderodde.algo.selection.Support.checkRangeIndices;
import java.util.Random;
/**
* This class implements a randomized selector relying on the ideas of
* quicksort.
*
* @param <E> the array component type. Must be {@link Comparable}.
* @author Rodion "rodde" Efremov
* @version 1.6 (Sep 26, 2022)
* @since 1.6 (Sep 26, 2022)
*/
public final class RandomizedSelector<E extends Comparable<? super E>> implements Selector<E> {
@Override
public E select(E[] array, int k) {
checkArray(array);
return select(array, k, 0, array.length);
}
@Override
public E select(E[] array, int index, int fromIndex, int toIndex) {
checkArray(array);
checkRangeIndices(fromIndex, toIndex);
checkK(toIndex - fromIndex, index);
Random random = new Random();
return selectImpl(array, index, fromIndex, toIndex, random);
}
private E selectImpl(E[] array,
int index,
int fromIndex,
int toIndex,
Random random) {
int rangeLength = toIndex - fromIndex;
if (rangeLength < 1) {
throw new IllegalArgumentException(
"The requested range is non-existent, length = "
+ rangeLength);
}
if (rangeLength == 1) {
return array[fromIndex];
}
int q = randomizedPartition(array, fromIndex, toIndex, random);
int k = q - fromIndex;
if (index == k) {
return array[q];
} else if (index < k) {
return selectImpl(array, index, fromIndex, q, random);
} else {
return selectImpl(array, index - k, q, toIndex, random);
}
}
private static <E extends Comparable<? super E>>
int randomizedPartition(E[] array,
int fromIndex,
int toIndex,
Random random) {
int i = fromIndex + random.nextInt(toIndex - fromIndex);
swap(array, i, toIndex - 1);
return partition(array, fromIndex, toIndex);
}
static <E extends Comparable<? super E>>
int partition(E[] array, int fromIndex, int toIndex) {
E x = array[toIndex - 1];
int i = fromIndex - 1;
for (int j = fromIndex; j < toIndex - 1; j++) {
if (array[j].compareTo(x) <= 0) {
swap(array, ++i, j);
}
}
swap(array, i + 1, toIndex - 1);
return i + 1;
}
private static <E> void swap(E[] array, int index1, int index2) {
E tmp = array[index1];
array[index1] = array[index2];
array[index2] = tmp;
}
}
com.github.coderodde.algo.selection.SortingSelector.java:
package com.github.coderodde.algo.selection;
import static com.github.coderodde.algo.selection.Support.checkArray;
import static com.github.coderodde.algo.selection.Support.checkK;
import static com.github.coderodde.algo.selection.Support.checkRangeIndices;
import java.util.Arrays;
/**
*
* @author Rodion "rodde" Efremov
* @param <E> the array component type.
* @version 1.6 (Sep 25, 2022)
* @since 1.6 (Sep 25, 2022)
*/
public final class SortingSelector<E extends Comparable<? super E>>
implements Selector<E> {
@Override
public E select(E[] array, int k, int fromIndex, int toIndex) {
checkArray(array);
checkRangeIndices(fromIndex, toIndex);
checkK(toIndex - fromIndex, k);
Arrays.sort(array, fromIndex, toIndex);
return array[k + fromIndex];
}
@Override
public E select(E[] array, int k) {
checkArray(array);
return select(array, k, 0, array.length);
}
}
com.github.coderodde.algo.selection.Support.java:
package com.github.coderodde.algo.selection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
/**
* This class provides common methods for dealing with the order statistics
* algorithms.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (Feb 14, 2022; Love Edition)
* @since 1.6 (Feb 14, 2022; Love Edition)
*/
public final class Support {
public static <E>
void checkArrayContainsNoDuplicatesElements(E[] array,
int fromIndex,
int toIndex) {
Set<E> filter = new HashSet<>();
for (E element : array) {
if (filter.contains(element)) {
throw new IllegalArgumentException(
"The element [" + element + "] is duplicated.");
}
filter.add(element);
}
}
public static <E> void checkArrayContainsNoDuplicatesElements(E[] array) {
checkArrayContainsNoDuplicatesElements(array, 0, array.length);
}
public static Integer[] getArray(Random random,
int length,
int minimumValue,
int maximumValue) {
Set<Integer> filter = new HashSet<>();
while (filter.size() < length) {
int datum = minimumValue
+ random.nextInt(maximumValue - minimumValue + 1);
filter.add(datum);
}
List<Integer> ints = new ArrayList<>(filter);
Collections.<Integer>shuffle(ints, random);
return ints.toArray(new Integer[length]);
}
static <E> void swap(E[] array, int index1, int index2) {
E tmp = array[index1];
array[index1] = array[index2];
array[index2] = tmp;
}
static <E> void checkArray(E[] array) {
Objects.requireNonNull(array, "The input array is null.");
if (array.length == 0) {
throw new IllegalArgumentException("The input array is empty.");
}
}
static void checkK(int rangeLength, int k) {
if (k < 0) {
throw new IllegalArgumentException("'k' is negative: " + k);
}
if (k >= rangeLength) {
throw new IllegalArgumentException(
"'k' is too large: " + k +
"Must be at most " + (rangeLength - 1) + ".");
}
}
static void checkRangeIndices(int fromIndex, int toIndex) {
if (fromIndex < 0) {
throw new IllegalArgumentException(
"frontIndex is negative: "
+ fromIndex
+ ". Must be at least 0.");
}
if (toIndex < 0) {
throw new IllegalArgumentException(
"toIndex is negative: "
+ toIndex
+ ". Must be at least 0.");
}
if (fromIndex > toIndex) {
throw new IllegalArgumentException(
"fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
}
}
}
com.github.coderodde.algo.selection.AbstractSelectorTest.java:
package com.github.coderodde.algo.selection;
import static com.github.coderodde.algo.selection.Support.getArray;
import java.util.Random;
import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Test;
public abstract class AbstractSelectorTest {
private static final int MAXIMUM_ARRAY_CAPACITY = 10_011;
private static final int MINIMUM_ARRAY_CAPACITY = 5_317;
private static final int MINIMUM_INT = -100_000;
private static final int MAXIMUM_INT = 100_000;
private Integer[] array ;
private final Selector<Integer> selector;
public AbstractSelectorTest(Selector<Integer> selector) {
this.selector = selector;
}
@Before
public void methodInit() {
this.array = new Integer[]{ 4, 1, 5, 2, 3 };
}
@Test
public void testSelect1() {
assertEquals(Integer.valueOf(1), selector.select(array, 0));
}
@Test
public void testSelect2() {
assertEquals(Integer.valueOf(2), selector.select(array, 1));
}
@Test
public void testSelect3() {
assertEquals(Integer.valueOf(3), selector.select(array, 2));
}
@Test
public void testSelect4() {
assertEquals(Integer.valueOf(4), selector.select(array, 3));
}
@Test
public void testSelect5() {
assertEquals(Integer.valueOf(5), selector.select(array, 4));
}
@Test
public void testSelect13() {
Integer[] array = new Integer[13];
for (int i = 0; i < array.length; i++) {
array[i] = i;
}
for (int i = 0; i < array.length; i++) {
assertEquals((Integer) i, selector.select(array, i));
}
}
@Test
public void bruteForceTest() {
Random random = new Random(1L);
int length =
MINIMUM_ARRAY_CAPACITY
+ random.nextInt(
MAXIMUM_ARRAY_CAPACITY - MINIMUM_ARRAY_CAPACITY + 1);
int index1 = random.nextInt(length);
int index2 = random.nextInt(length);
int fromIndex = Math.min(index1, index2);
int toIndex = Math.max(index1, index2);
Integer[] array1 = getArray(random,
length,
MINIMUM_INT,
MAXIMUM_INT);
Support.checkArrayContainsNoDuplicatesElements(array1);
Integer[] array2 = array1.clone();
Selector<Integer> sortingSelector = new SortingSelector<>();
Selector<Integer> linearTimeSelector = new LinearTimeSelector();
for (int i = fromIndex, k = 0; i < toIndex; i++, k++) {
Integer int1 =
sortingSelector.select(
array1,
k,
fromIndex,
toIndex);
Integer int2 =
linearTimeSelector.select(
array2,
k,
fromIndex,
toIndex);
assertEquals(int1, int2);
}
}
}
com.github.coderodde.algo.selection.LinearTimeSelectorTest.java:
package com.github.coderodde.algo.selection;
public class LinearTimeSelectorTest extends AbstractSelectorTest {
public LinearTimeSelectorTest() {
super(new LinearTimeSelector<>());
}
}
com.github.coderodde.algo.selection.RandomizedSelectorTest.java:
package com.github.coderodde.algo.selection;
import java.util.Arrays;
import org.junit.Test;
import static org.junit.Assert.*;
public class RandomizedSelectorTest extends AbstractSelectorTest {
public RandomizedSelectorTest() {
super(new RandomizedSelector<>());
}
@Test
public void partition() {
Integer[] array = { 2, 8, 7, 1, 3, 5, 6, 4 };
RandomizedSelector.partition(array, 0, array.length);
assertTrue(Arrays.equals(array, new Integer[] {
2, 1, 3, 4, 7, 5, 6, 8
}));
}
}
com.github.coderodde.algo.selection.SortingSelectorTest.java:
package com.github.coderodde.algo.selection;
public class SortingSelectorTest extends AbstractSelectorTest {
public SortingSelectorTest() {
super(new SortingSelector<>());
}
}
com.github.coderodde.algo.selection.SupportTest.java:
package com.github.coderodde.algo.selection;
import org.junit.Test;
public class SupportTest {
@Test(expected = IllegalArgumentException.class)
public void throwOnDuplicateElementInArray() {
Integer[] array = { 2, 3, 4, 5, 1, -1, 3 };
Support.checkArrayContainsNoDuplicatesElements(array);
}
@Test
public void passesOnNoDuplicateElementInArray() {
Integer[] array = { 2, 3, 4, 5, 1, -1 };
Support.checkArrayContainsNoDuplicatesElements(array);
}
}
com.github.coderodde.algo.selection.benchmark.SelectorBenchmark.java:
package com.github.coderodde.algo.selection.benchmark;
import com.github.coderodde.algo.selection.LinearTimeSelector;
import com.github.coderodde.algo.selection.RandomizedSelector;
import com.github.coderodde.algo.selection.Selector;
import com.github.coderodde.algo.selection.SortingSelector;
import com.github.coderodde.algo.selection.Support;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
* This class implements the selector algorithm benchmark.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (Sep 26, 2022)
* @since 1.6 (Sep 26, 2022)
*/
public final class SelectorBenchmark {
private static final int ARRAY_LENGTH = 10_000;
private static final int FROM_INDEX = 1000;
private static final int TO_INDEX = ARRAY_LENGTH - 1000;
private static final int MINIMUM_VALUE = -100_000;
private static final int MAXIMUM_VALUE = 100_000;
public static void main(String[] args) {
long seed = System.currentTimeMillis();
Random random = new Random(seed);
System.out.println("seed = " + seed);
Integer[] array1 =
Support.getArray(
random,
ARRAY_LENGTH,
MINIMUM_VALUE,
MAXIMUM_VALUE);
Integer[] array2 = array1.clone();
Integer[] array3 = array1.clone();
Selector<Integer> selector1 = new SortingSelector<>();
Selector<Integer> selector2 = new LinearTimeSelector<>();
Selector<Integer> selector3 = new RandomizedSelector<>();
List<Integer> results1 = warmup(selector1,
array1,
FROM_INDEX,
TO_INDEX);
List<Integer> results2 = warmup(selector2,
array2,
FROM_INDEX,
TO_INDEX);
List<Integer> results3 = warmup(selector3,
array3,
FROM_INDEX,
TO_INDEX);
if (results1.equals(results2) && results2.equals(results3)) {
System.out.println("Algorithms agreed on warmup.");
}
results1 = benchmark(selector1, array1, FROM_INDEX, TO_INDEX);
results2 = benchmark(selector2, array2, FROM_INDEX, TO_INDEX);
results3 = benchmark(selector3, array3, FROM_INDEX, TO_INDEX);
if (results1.equals(results2) && results2.equals(results3)) {
System.out.println("Algorithms agreed on benchmark.");
}
}
private static List<Integer> warmup(Selector<Integer> selector,
Integer[] array,
int fromIndex,
int toIndex) {
return run(selector, array, fromIndex, toIndex, false);
}
private static List<Integer> benchmark(Selector<Integer> selector,
Integer[] array,
int fromIndex,
int toIndex) {
return run(selector, array, fromIndex, toIndex, true);
}
private static List<Integer> run(Selector<Integer> selector,
Integer[] array,
int fromIndex,
int toIndex,
boolean print) {
List<Integer> results = new ArrayList<>(toIndex - fromIndex);
long duration = 0L;
for (int k = 0; k < toIndex - fromIndex; k++) {
shuffle(array, fromIndex, toIndex);
long startTime = System.currentTimeMillis();
results.add(selector.select(array, k, fromIndex, toIndex));
long endTime = System.currentTimeMillis();
duration += endTime - startTime;
}
if (print) {
System.out.println(
selector.getClass().getSimpleName()
+ " in "
+ duration
+ " milliseconds.");
}
return results;
}
private static <E> void shuffle(E[] array, int fromIndex, int toIndex) {
Random random = new Random();
List<E> list = new ArrayList<>(toIndex - fromIndex);
for (int i = fromIndex; i < toIndex; i++) {
list.add(array[i]);
}
Collections.shuffle(list, random);
for (int i = fromIndex; i < toIndex; i++) {
array[i] = list.get(i - fromIndex);
}
}
}
The typical output on my PC is:
seed = 1664257975649
Algorithms agreed on warmup.
SortingSelector in 9708 milliseconds.
LinearTimeSelector in 6910 milliseconds.
RandomizedSelector in 1432 milliseconds.
Algorithms agreed on benchmark.
Critique request
Is there any way to improve the code?
1 Answer 1
(I'm no fan of final for interfaces and classes, this seems to carry to sealed.)
one alternative to specifying
<E extends Comparable<? super E>>was to accept aComparator
- I guess that would have been beside the point of comparing algorithms, and complicating implementation.defines the API for the selection algorithmsis a laudable promise - I don't seecom.github.coderodde.algo.selection.Selectorlive up to it:
• Will the contents ofarraystay unchanged, permuted, some values overwritten with others, or worse?
• Doesk-smallest refer to [fromIndex,toIndex) or to all ofarray, with the implication all elements in [0,fromIndex) are smaller?I see three implementations of
select(E[] array, int k)in terms ofselect(array, k, fromIndex, toIndex)- and three inconsistent instances of parameter checking:
Selectorcould have provided adefaultimplementation.I guess this is one of the occasions where I'd prefer an abstract class for being able to provide final methods:
public abstract class Selector<E extends Comparable<? super E>> { /** @param array likely to be permuted @return a smallest element of <code>array</code> from <code>fromIndex</code> to <code>toIndex</code> (exclusive) such that <code>k</code> elements in range are no greater than it. */ protected abstract E permuting(E[] array, int k, int fromIndex, int toIndex); /** @param array may be permuted @return a smallest element of <code>array</code> from <code>fromIndex</code> to <code>toIndex</code> (exclusive) such that <code>k</code> elements in range are no greater than it. */ public final E select_permuting(E[] array, int k, int fromIndex, int toIndex) { checkArray(array); checkRangeIndices(fromIndex, toIndex); if (0 == k) return Collections.min(Arrays.asList(array).subList(fromIndex, toIndex)); if (k == toIndex - 1) return Collections.max(Arrays.asList(array).subList(fromIndex, toIndex)); checkK(toIndex - fromIndex, k); return permuting(array, k, fromIndex, toIndex); } /** @param array will not be modified @return a smallest element of <code>array</code> from <code>fromIndex</code> to <code>toIndex</code> (exclusive) such that <code>k</code> elements in range are no greater than it. */ public final E select(E[] array, int k, int fromIndex, int toIndex) { checkArray(array); E[] range = Arrays.copyOfRange(array, fromIndex, toIndex); return select_permuting(range, k, 0, range.length); } /** @param array will not be modified @return an element of <code>array</code> such that <code>k</code> elements are smaller than or equal to it. */ public final E select(E[] array, int k) { checkArray(array); return select_permuting(array.clone(), k, 0, array.length); } }(When successfully defining a suitable
/** @param array will be permuted such that <code>k</code> elements in range no greater than the <code>k</code>th are at the beginning of the range @return ??? */ protected abstract ??? partition(E[] array, int k, int fromIndex, int toIndex);, I'd add a final implementation of
select_permuting(array, k, fromIndex, toIndex)using that.)LinearTimeSelector- irritatingly introduces another interpretation of
kinselectImpl()
(without documentingindex). - creates a lot of arrays.
OnceleftArrayLengthandrightArrayLengthare known, copy just the appropriate number of elements to an array created at the first partition and passed down from there if not wanting to modifyarray.
- irritatingly introduces another interpretation of
I always found pivot selection in median-of-five-quickselect somewhat aloft:
It should be possible to establish bounds for the other "next level medians", not just the middle one in median-of-five.
Ifkwas smaller(greater) than the lower(upper) limit for the 1st or 2nd(last or 4th) median, pick that as the pivot.
(Hardly much use: for sizeable samples, medians can be expected to be close. Small multisets better be handled differently altogether.)
-
\$\begingroup\$ 20 to 60 for 2nd, 40 to 80 for 4th median of five? 10 to 50 for 1st, 50 to 90 for last... \$\endgroup\$greybeard– greybeard2022年09月28日 07:56:40 +00:00Commented Sep 28, 2022 at 7:56
-
\$\begingroup\$ (I don't need to suggest another pivot selection for O(n) quickselect, PICK2 from the almost 50 year old paper looks a lot like my idea.) \$\endgroup\$greybeard– greybeard2022年09月29日 07:18:18 +00:00Commented Sep 29, 2022 at 7:18
You must log in to answer this question.
Explore related questions
See similar questions with these tags.
Selection.checkArrayContainsNoDuplicatesElements()ignores the indices passed. \$\endgroup\$