1
\$\begingroup\$

(See the previous version.)

Now I have this:

com.github.coderodde.util.IntHashSet:

package com.github.coderodde.util;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
/**
 * This class implements a simple hash set for non-negative {@code int} values.
 * It is used in the {@link com.github.coderodde.util.LinkedList} in order to 
 * keep track of nodes that are being pointed to by fingers.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.6 (Aug 29, 2021)
 * @since 1.6 (Aug 29, 2021)
 */
public class IntHashSet {
 private static final int INITIAL_CAPACITY = 8;
 private static final class Node {
 Node next;
 int integer;
 Node(int integer, Node next) {
 this.integer = integer;
 this.next = next;
 }
 @Override
 public String toString() {
 return "Chain node, integer = " + integer;
 }
 }
 private Node[] table = new Node[INITIAL_CAPACITY];
 private int size = 0;
 private int mask = INITIAL_CAPACITY - 1;
 
 @Override
 public String toString() {
 return "size = " + size;
 }
 public void add(int integer) {
 int targetCollisionChainIndex = integer & mask;
 Node node = table[targetCollisionChainIndex];
 
 while (node != null) {
 if (node.integer == integer) {
 return;
 }
 
 node = node.next;
 }
 
 size++;
 
 if (size > table.length) {
 Node[] newTable = new Node[2 * table.length];
 mask = newTable.length - 1;
 
 for (Node currentNode : table) {
 while (currentNode != null) {
 Node nextNode = currentNode.next;
 
 int newTableHash = currentNode.integer & mask;
 currentNode.next = newTable[newTableHash];
 newTable[newTableHash] = currentNode;
 
 currentNode = nextNode;
 }
 }
 
 table = newTable;
 targetCollisionChainIndex = integer & mask;
 }
 
 Node newNode = new Node(integer, table[targetCollisionChainIndex]);
 table[targetCollisionChainIndex] = newNode;
 }
 public boolean contains(int integer) {
 final int collisionChainIndex = integer & mask;
 Node node = table[collisionChainIndex];
 while (node != null) {
 if (node.integer == integer) {
 return true;
 }
 node = node.next;
 }
 return false;
 }
 public void remove(int integer) {
 int targetCollisionChainIndex = integer & mask;
 Node node = table[targetCollisionChainIndex];
 Node prev = null;
 
 while (node != null) {
 if (node.integer == integer) {
 break;
 }
 
 prev = node;
 node = node.next;
 }
 
 if (node == null) 
 return;
 size--;
 
 if (size * 4 <= table.length && table.length >= INITIAL_CAPACITY * 4) {
 Node[] newTable = new Node[table.length / 4];
 mask = newTable.length - 1;
 
 for (Node currentNode : table) {
 while (currentNode != null) {
 if (currentNode == node) {
 // Omit the node with the target integer:
 currentNode = currentNode.next;
 continue;
 }
 
 Node nextNode = currentNode.next;
 
 int newTableHash = currentNode.integer & mask;
 currentNode.next = newTable[newTableHash];
 newTable[newTableHash] = currentNode;
 
 currentNode = nextNode;
 }
 }
 
 table = newTable;
 } else if (prev == null) {
 table[targetCollisionChainIndex] = 
 table[targetCollisionChainIndex].next;
 } else {
 prev.next = prev.next.next;
 }
 }
 public void clear() {
 size = 0;
 table = new Node[INITIAL_CAPACITY];
 mask = table.length - 1;
 }
 
 private static final int DATA_LENGTH = 5_000_000;
 
 
 public static void main(String[] args) {
 Random random = new Random(10L);
 
 int[] addData = getAddData(random);
 int[] containsData = getContainsData(random);
 int[] removeData = getRemoveData(random);
 
 for (int iter = 0; iter < 5; iter++) {
 System.out.println(">>> Iteration: " + (iter + 1) + "/5");
 
 IntHashSet myset = new IntHashSet();
 Set<Integer> set = new HashSet<>();
 long start = System.currentTimeMillis();
 for (int i : addData) {
 myset.add(i);
 }
 long end = System.currentTimeMillis();
 System.out.println(" IntHashSet.add in " + (end - start));
 start = System.currentTimeMillis();
 for (int i : addData) {
 set.add(i);
 }
 end = System.currentTimeMillis();
 System.out.println(" HashSet.add in " + (end - start) + "\n");
 start = System.currentTimeMillis();
 for (int i : containsData) {
 myset.contains(i);
 }
 end = System.currentTimeMillis();
 System.out.println(" IntHashSet.contains in " + (end - start));
 start = System.currentTimeMillis();
 for (int i : containsData) {
 set.contains(i);
 }
 end = System.currentTimeMillis();
 System.out.println(" HashSet.contains in " + (end - start) + 
 "\n");
 start = System.currentTimeMillis();
 for (int i : removeData) {
 myset.remove(i);
 }
 end = System.currentTimeMillis();
 System.out.println(" IntHashSet.remove in " + (end - start));
 start = System.currentTimeMillis();
 for (int i : removeData) {
 set.remove(i);
 }
 end = System.currentTimeMillis();
 System.out.println(" HashSet.remove in " + (end - start) + "\n");
 }
 }
 
 private static int[] getAddData(Random random) {
 return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
 }
 
 private static int[] getContainsData(Random random) {
 return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
 }
 
 private static int[] getRemoveData(Random random) {
 return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
 }
 
 private static int[] getData(int length, int maxValue, Random random) {
 int[] data = new int[length];
 
 for (int i = 0; i < length; i++) {
 data[i] = random.nextInt(maxValue + 1);
 }
 
 return data;
 }
}

com.github.coderodde.util.IntHashSetTest:

package com.github.coderodde.util;
import java.util.Random;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
public class IntHashSetTest {
 private final IntHashSet set = new IntHashSet();
 @Before
 public void beforeTest() {
 set.clear();
 }
 
 @Test
 public void removeFromCollisionChainBug() {
 bar("removeFromCollisionChainBug");
 
 set.add(0b00001); // 1
 set.add(0b01001); // 9
 set.add(0b10001); // 17
 
 set.remove(1); // remove from tail
 
 set.add(0b00001); // 1
 set.add(0b01001); // 9
 set.add(0b10001); // 17
 
 set.remove(1); // remove from head
 
 set.add(0b00001); // 1
 set.add(0b01001); // 9
 set.add(0b10001); // 17
 
 set.remove(17); // remove from middle
 
 bar("removeFromCollisionChainBug done!");
 }
 
 
 @Test
 public void removeBug() {
 bar("removeBug");
 
 for (int i = 0; i < 9; i++) 
 set.add(i);
 
 for (int i = 0; i < 9; i++) 
 set.remove(i);
 
 bar("removeBug done!");
 }
 
 @Test
 public void removeFirstMiddleLast() {
 bar("removeFirstMiddleLast");
 
 // All three ints will end up in the same collision chain:
 set.add(1); // 0b00001
 set.add(9); // 0b01001
 set.add(17); // 0b10001
 
 assertTrue(set.contains(1));
 assertTrue(set.contains(9));
 assertTrue(set.contains(17));
 
 set.remove(1);
 
 assertFalse(set.contains(1));
 assertTrue(set.contains(9));
 assertTrue(set.contains(17));
 
 set.remove(17);
 
 assertFalse(set.contains(1));
 assertTrue(set.contains(9));
 assertFalse(set.contains(17));
 
 set.remove(9);
 
 assertFalse(set.contains(1));
 assertFalse(set.contains(9));
 assertFalse(set.contains(17));
 
 bar("removeFirstMiddleLast done!");
 }
 @Test
 public void add() {
 bar("add");
 
 for (int i = 0; i < 500; i++) {
 set.add(i);
 }
 for (int i = 0; i < 500; i++) {
 assertTrue(set.contains(i));
 }
 for (int i = 500; i < 1_000; i++) {
 assertFalse(set.contains(i));
 }
 for (int i = 450; i < 550; i++) {
 set.remove(i);
 }
 for (int i = 450; i < 1_000; i++) {
 assertFalse(set.contains(i));
 }
 for (int i = 0; i < 450; i++) {
 assertTrue(set.contains(i));
 }
 
 bar("add done!");
 }
 @Test
 public void contains() {
 bar("contains");
 
 set.add(10);
 set.add(20);
 set.add(30);
 for (int i = 1; i < 40; i++) {
 if (i % 10 == 0) {
 assertTrue(set.contains(i));
 } else {
 assertFalse(set.contains(i));
 }
 }
 
 bar("contains done!");
 }
 @Test
 public void remove() {
 bar("remove");
 
 set.add(1);
 set.add(2);
 set.add(3);
 set.add(4);
 set.add(5);
 set.remove(2);
 set.remove(4);
 set.add(2);
 assertFalse(set.contains(4));
 assertTrue(set.contains(1));
 assertTrue(set.contains(2));
 assertTrue(set.contains(3));
 assertTrue(set.contains(5));
 
 bar("remove done!");
 }
 @Test
 public void clear() {
 bar("clear");
 
 for (int i = 0; i < 100; i++) {
 set.add(i);
 }
 for (int i = 0; i < 100; i++) {
 assertTrue(set.contains(i));
 }
 set.clear();
 for (int i = 0; i < 100; i++) {
 assertFalse(set.contains(i));
 }
 
 bar("clear done!");
 }
 @Test 
 public void bruteForceAdd() {
 bar("bruteForceAdd");
 
 Random random = new Random(13L);
 int[] data = new int[10_000];
 for (int i = 0; i < data.length; i++) {
 int datum = random.nextInt(5_000);
 data[i] = datum;
 set.add(datum);
 }
 for (int i = 0; i < data.length; i++) {
 assertTrue(set.contains(data[i]));
 }
 
 bar("bruteForceAdd done!");
 }
 @Test
 public void bruteForceRemove() {
 bar("bruteForceRemove");
 
 Random random = new Random(100L);
 int[] data = new int[10_000];
 for (int i = 0; i < data.length; i++) {
 int datum = random.nextInt(5_000);
 data[i] = datum;
 set.add(datum);
 }
 shuffle(data, random);
 for (int i = 0; i < data.length; i++) {
 int datum = data[i];
 if (set.contains(datum)) {
 set.remove(datum);
 if (set.contains(datum)) 
 System.out.println("found i = " + i);
 } 
 assertFalse(set.contains(datum));
 }
 
 bar("bruteForceRemove done!");
 }
 private static void shuffle(int[] data, Random random) {
 for (int i = data.length - 1; i > 0; --i) {
 final int j = random.nextInt(i + 1);
 swap(data, i, j);
 }
 }
 private static void swap(int[] data, int i, int j) {
 int tmp = data[i];
 data[i] = data[j];
 data[j] = tmp;
 }
 
 private static void bar(String text) {
 System.out.println("--- " + text);
 }
}

Performance figures:

>>> Iteration: 1/5
 IntHashSet.add in 818
 HashSet.add in 1306
 IntHashSet.contains in 150
 HashSet.contains in 321
 IntHashSet.remove in 250
 HashSet.remove in 263
>>> Iteration: 2/5
 IntHashSet.add in 607
 HashSet.add in 1130
 IntHashSet.contains in 151
 HashSet.contains in 280
 IntHashSet.remove in 179
 HashSet.remove in 203
>>> Iteration: 3/5
 IntHashSet.add in 577
 HashSet.add in 1060
 IntHashSet.contains in 159
 HashSet.contains in 292
 IntHashSet.remove in 189
 HashSet.remove in 229
>>> Iteration: 4/5
 IntHashSet.add in 522
 HashSet.add in 891
 IntHashSet.contains in 166
 HashSet.contains in 316
 IntHashSet.remove in 193
 HashSet.remove in 233
>>> Iteration: 5/5
 IntHashSet.add in 665
 HashSet.add in 940
 IntHashSet.contains in 160
 HashSet.contains in 349
 IntHashSet.remove in 199
 HashSet.remove in 232

Critique request

Please, tell me anything that comes to mind! ^^

asked Sep 2, 2021 at 2:13
\$\endgroup\$
2
  • 2
    \$\begingroup\$ I just wonder why you did not update your previous version? You can change the question and improve the code as long as it has no answers. \$\endgroup\$ Commented Sep 2, 2021 at 8:48
  • \$\begingroup\$ This one should not contain bugs. \$\endgroup\$ Commented Sep 4, 2021 at 3:04

1 Answer 1

1
\$\begingroup\$

Main Code

Readability

For me, whilst you've addressed some bugs since you're initial version it looks like the code structure within your IntHashSet has moved in a less readable direction. Whilst you started out having a contract method which was responsible for reducing the size of the table now all of this logic sits within your remove method. This makes the remove method quite large. Instead, I'd have preferred the introduction of additional methods to help break the code up.

For example, one way to remove the need for comments is to add a new method to give a piece of functionality a name. So rather than:

if (currentNode != node) {
 // Omit the node with the target integer:

Perhaps this would be more descriptive

if (isNodeToBeRemoved(currentNode, nodeToBeRemoved)) {

Continue

I generally don't mind using continue, however often the code can be rearranged to remove the need for it. Sometimes this can make the code easier to read. So, rather than:

while (currentNode != null) {
 if (currentNode == node) {
 // Omit the node with the target integer:
 currentNode = currentNode.next;
 continue;
 }
 Node nextNode = currentNode.next;
 int newTableHash = currentNode.integer & mask;
 currentNode.next = newTable[newTableHash];
 newTable[newTableHash] = currentNode;
 currentNode = nextNode;
}

Consider:

while (currentNode != null) {
 Node nextNode = currentNode.next;
 if (notNodeBeingRemoved(currentNode, nodeToRemove)) {
 int newTableHash = currentNode.integer & mask;
 currentNode.next = newTable[newTableHash];
 newTable[newTableHash] = currentNode;
 }
 currentNode = nextNode;
}

I'd probably go further and extract a method addToTable(newTable, currentNode) for:

int newTableHash = currentNode.integer & mask;
currentNode.next = newTable[newTableHash];
newTable[newTableHash] = currentNode;

Duplication

The addToTable logic above is an example of a section of duplicate logic within the code. There's also some duplication between add and contains. Having add call contains would be a simple change. There's also similar logic in remove. Adding something like a private findNodeAndPrev method which performed the search and then returned the node containing the target item and its predecessor would allow the logic to be shared between all three methods, rather than being duplicated, although this might introduce unnecessary complexity.

Testing

Test Noise

All of your tests start and end with calls to bar. This seems really tedious, error prone and adds noise to the actual test code, for very little value. Consider removing them. If you really need them to be there, then whilst I haven't tested it there seems to be a way of doing this more generically using a TestWatcher/Watchman in Junit 4. Alternately, with JUnit 5 you can create a simple extension that allows you to do this sort of thing:

@ExtendWith(IntHashSetTest.InvocationLogger.class)
public class IntHashSetTest {
 static class InvocationLogger implements BeforeEachCallback, AfterEachCallback {
 @Override
 public void afterEach(ExtensionContext extensionContext) throws Exception {
 bar(extensionContext.getDisplayName());
 }
 @Override
 public void beforeEach(ExtensionContext extensionContext) throws Exception {
 bar(extensionContext.getDisplayName() + " done!");
 }
 private static void bar(String text) {
 System.out.println("--- " + text);
 }
 }

Testing What?

I found it very difficult to tell just from the names what it was your tests were testing. Names like removeBug really don't say anything. Looking at the evolution of the code, it looks like it's testing if contracting works as expected when reducing the size of the set to 0. There's no assertions so looking at the code, it looks like it's saying can I put some stuff in, and take it back out again without an exception being thrown.

Coupling

There is a high degree of coupling between your tests and the code under test. This can be necessary, however typically the way it would work is that changes to the code break the tests where as several of your tests have hidden coupling. Some tests for boundary cases assume a known initial size for the set. If that initial size is changed then the conditions that the tests are attempting to test don't occur. So, for example removeFromCollisionChainBug assumes that all of the items added are added to the same bucket. If I doubled the initial size, this would no longer be the case. Whilst the test continues to pass, it's no longer testing the same thing. When you have tests that need to be coupled to the implementation consider making this coupling explicit, so that the test can continue to work as expected, or break. An obvious way to do that here would be to add an overloaded constructor to allow the initial tablesize to be passed into your set.

Testing too much

When tests are focused on testing one thing, they're usually more straightforward and easier to understand. Adding extra assertions might seem like a good idea, but if they're not focused on the purpose of the test then they just add noise and obscure the core intention of what's being tested.

So, in your add test for example, after you add the items and assert that they have been added to the set you then loop asserting that the next 500 numbers aren't in the set.

for (int i = 500; i < 1_000; i++) {
 assertFalse(set.contains(i));
}

Does this really add value? Why stop at 1000? The test then goes on to remove items and validate that the removal has worked... this seems like a totally different test.

answered Sep 17, 2021 at 22:06
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.