-
Notifications
You must be signed in to change notification settings - Fork 20.2k
Document hash() limitations in BucketSort; add tests showing distribution behavior #6511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1af30ca
e21a21f
d8a64f4
a38e538
9402fc1
a35f326
d031414
635fcdb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any reason why you didn't put your Tests in the exististing BucketSortTest ? @Leogricci
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the issue only related to the particular case of the hash function i thought it might be better to create another test class, but I realize now it just makes it harder to find. Sorry for the inconvenience, I will modify it now.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package com.thealgorithms.sorts; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertArrayEquals; | ||
|
||
import java.util.Arrays; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class BucketSortHashBehaviorTest { | ||
|
||
private static <T extends Comparable<T>> int pseudoHash(final T element, final T min, final T max, final int numberOfBuckets) { | ||
// Reproduces the production hash() logic | ||
double range = max.compareTo(min); | ||
double normalizedValue = element.compareTo(min) / range; // -1/0/1 divided by -1/0/1 | ||
return (int) (normalizedValue * (numberOfBuckets - 1)); | ||
} | ||
|
||
@Test // Test case when all numbers are equal | ||
void sortStillCorrectWhenAllEqual() { | ||
Integer[] arr = {1, 1, 1, 1, 1}; | ||
Integer[] expected = arr.clone(); | ||
|
||
new BucketSort().sort(arr); | ||
assertArrayEquals(expected, arr); | ||
|
||
// Observe bucket mapping (all collapse to index 0) | ||
Integer min = 1; | ||
Integer max = 1; | ||
int numberOfBuckets = Math.max(arr.length / 10, 1); // same as BUCKET_DIVISOR rule | ||
int idx = pseudoHash(1, min, max, numberOfBuckets); | ||
// idx will be 0 because NaN cast to int -> 0 in Java | ||
System.out.println("All-equal case -> bucket index: " + idx); | ||
} | ||
|
||
@Test // Test case with non-equal integers | ||
void sortStillCorrectNonEqualIntegers() { | ||
Integer[] arr = {20, 40, 30, 10}; | ||
Integer[] expected = {10, 20, 30, 40}; | ||
|
||
new BucketSort().sort(arr); | ||
assertArrayEquals(expected, arr); | ||
|
||
Integer min = Arrays.stream(arr).min(Integer::compareTo).get(); | ||
Integer max = Arrays.stream(arr).max(Integer::compareTo).get(); | ||
int numberOfBuckets = Math.max(arr.length / 10, 1); // often 1 here; bump to 4 to demonstrate | ||
numberOfBuckets = 4; | ||
|
||
for (Integer x : arr) { | ||
int idx = pseudoHash(x, min, max, numberOfBuckets); | ||
System.out.println("Value " + x + " -> bucket " + idx); | ||
} | ||
// Expect only two distinct buckets because compareTo gives -1/0/1 | ||
} | ||
|
||
@Test // Test case when the Array contains Strings | ||
void sortStillCorrectWhenStrings() { | ||
String[] arr = {"apple", "banana", "carrot"}; | ||
String[] expected = arr.clone(); | ||
|
||
new BucketSort().sort(arr); | ||
assertArrayEquals(expected, arr); | ||
|
||
String min = Arrays.stream(arr).min(String::compareTo).get(); | ||
String max = Arrays.stream(arr).max(String::compareTo).get(); | ||
int numberOfBuckets = 4; | ||
|
||
for (String s : arr) { | ||
int idx = pseudoHash(s, min, max, numberOfBuckets); | ||
System.out.println("Value \"" + s + "\" -> bucket " + idx); | ||
} | ||
// Buckets reflect only lexicographic order, not a numeric spacing | ||
} | ||
} |