Fast hash table based set of pointers

Question 1

I am trying to make a very fast hash table based set of pointers. It is supposed to be as fast as possible, not conforming to standard C++ at times. This is always marked in the code. Collisions are resolved by linear probing. The maximum size is known at construction time and memory grows by multiples of 2. All memory is pre-allocated. The set should perform only insertions. No other functionality is needed.

Here is my code:

#ifndef HASH_TABLE_H
#define HASH_TABLE_H
#include <vector>
#include <cstdint>
#include <cstring>
#include <algorithm>
#include "range_checking.h" // contains Positive<int> a simple range checking utility
//>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
template <typename TItem>
class HashTable
{
public: //---------------------------------------------------------------------
 HashTable (Positive<int> maxSize, Positive<int> probableSize = 32);
 void reset(Positive<int> probableSize);
 bool insert(const TItem *address);
 int size() const;
 long nCollisions() const;
private: //--------------------------------------------------------------------
 typedef std::vector<const TItem*> TArray;
 std::vector<TArray> arrays_;
 int operatingArray_;
 uintptr_t computeIndex(const TItem* address) const;
 void expand();
 bool performInsertion(const TItem *address);
 Real maxLoadFactor_;
 int currentArraySize_;
 int nItems_;
 int maxItems_;
 long nCollisions_ = 0;
};
unsigned fastCeilLog2(unsigned argument);
unsigned fastModuloPowerOfTwo(unsigned argument, unsigned powerOfTwo);
//>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
//*****************************************************************************
// Implementations:
//*****************************************************************************
//=============================================================================
template <typename TItem>
HashTable<TItem>::HashTable(Positive<int> maxSize, Positive<int> probableSize):
 operatingArray_(0),
 maxLoadFactor_(0.5),
 currentArraySize_(1),
 nItems_(0),
 maxItems_(0)
{
 do
 {
 currentArraySize_ *= 2;
 arrays_.emplace_back(currentArraySize_, nullptr);
 }
 while (currentArraySize_ < maxSize / maxLoadFactor_);
 reset(probableSize);
}
//=============================================================================
//=============================================================================
template <typename TItem>
void HashTable<TItem>::reset(Positive<int> probableSize)
{
 /// The following line is nonstandard and should be:
 /// std::fill(arrays_[operatingArray_].begin(),
 /// arrays_[operatingArray_].end(), nullptr);
 std::memset(&(arrays_[operatingArray_][0]),0,
 arrays_[operatingArray_].size()*sizeof(int*));
 maxItems_ = probableSize / maxLoadFactor_;
 operatingArray_ = fastCeilLog2(maxItems_) - 1;
 currentArraySize_ = arrays_[operatingArray_].size();
 nItems_ = 0;
}
//=============================================================================
//=============================================================================
template <typename TItem>
bool HashTable<TItem>::insert(const TItem* address)
{
 if (nItems_ + 1 > maxItems_)
 {
 expand();
 return insert(address);
 }
 bool inserted = performInsertion(address);
 if (inserted)
 ++nItems_;
 return inserted;
}
//=============================================================================
//=============================================================================
template <typename TItem>
int HashTable<TItem>::size() const
{
 return nItems_;
}
//=============================================================================
//=============================================================================
template <typename TItem>
long HashTable<TItem>::nCollisions() const
{
 return nCollisions_;
}
//=============================================================================
//=============================================================================
template <typename TItem>
uintptr_t HashTable<TItem>::computeIndex(const TItem* address) const
{
 uintptr_t index = reinterpret_cast<uintptr_t>(address); /// Nonstandard
 index >>= 5; /// Ad hoc
 index = fastModuloPowerOfTwo(index, currentArraySize_);
 return index;
}
//=============================================================================
//=============================================================================
template <typename TItem>
void HashTable<TItem>::expand()
{
 TArray& lastArray = arrays_[operatingArray_];
 ++operatingArray_;
 currentArraySize_ = arrays_[operatingArray_].size();
 maxItems_ = maxLoadFactor_ * currentArraySize_;
 for (auto i : lastArray)
 performInsertion(i);
 /// The following line is nonstandard and should be:
 /// std::fill(lastArray.begin(), lastArray.end(), nullptr);
 std::memset(&lastArray[0], 0, lastArray.size()*sizeof(int*));
}
//=============================================================================
//=============================================================================
template <typename TItem>
bool HashTable<TItem>::performInsertion(const TItem *address)
{
 uintptr_t index = computeIndex(address);
 const TItem* insertionSpot = arrays_[operatingArray_][index];
 while (insertionSpot != nullptr)
 {
 ++nCollisions_;
 if (insertionSpot == address)
 return false;
 ++index;
 index = fastModuloPowerOfTwo(index, currentArraySize_);
 insertionSpot = arrays_[operatingArray_][index];
 }
 arrays_[operatingArray_][index] = address;
 return true;
}
//=============================================================================
//=============================================================================
inline unsigned fastCeilLog2(unsigned argument)
{
 unsigned logarithm = 1;
 while (argument >>= 1)
 ++logarithm;
 return logarithm;
}
//=============================================================================
//=============================================================================
inline unsigned fastModuloPowerOfTwo(unsigned argument, unsigned powerOfTwo)
{
 argument &= (powerOfTwo-1);
 return argument;
}
//=============================================================================
#endif // HASH_TABLE_H

I am having problems with collisions.

I made a benchmark that reserves 32 items and feeds data of various sizes and overlaps into the table, letting it grow naturally. Then it is cleared, reserved again and the same data is fed into it again. The times for 10 runs are summed and the same is done with std::unordered_set of gcc 4.8. The ratios and the average number of collisions for my hashtable are presented in this document.

I am concerned about:

The number of collisions: Should there be so many collisions? Is it normal that to feed 1600 items with 0.3 overlap (30% of the items are duplicates) results in 16 000 collisions?

The irregularity in the collision data, For example:

1600 items, 0% overlap => cca 1700 collisions

3200 items, 0% overlap => cca 1 700 000 collisions

How can I improve my hash table?
Could you give me general code assesment?
Could you explain the strange collision data and the great number of collisions? Is this normal?
How can I reduce the number of collisions?

I made this hashtable just by some ad-hoc experimentation and reading the Wikipedia article and it is the first time I made a hash table. So I am very inexperienced and humble, welcoming any advice at all.

Question 2

Never design your own hashing algorithm (mathematically it is very complex to get correct to avoid collisions). But a hint. Use prime numbers in its generation will help with preventing collisions.

Question 3

You do realize that C++11 has std::unordered_set which is basically a hash table.

Question 4

Read this: Good hash algorithm for list of (memory) addresses

Question 5

powers of 2 may be faster. But they are not a good technique for getting non clashing hashes. That is why you use primes to reduce the clashes (its a property of prime that helps in this. See your local maths text book for details).

Question 6

As a HashTable with max 4 billion elements grows approx 20 times until it is filled up completely (starting at approx 1000 elements), dynamically growing the array isn't much of a problem: It hardly ever happens. @MartinDrozdik: Your hash function is just way to bad, and that's why you end up in so many collisions. Remember: You have to fit 2^32 numbers in a much smaller range. Of course it's hard to find a good hash. Prime numbers are a good point to start. You might find <address> modulo <primenumber> good enough.

Question 7

Find it hard to believe that std::fill is slower than std::memeset

/// The following line is nonstandard and should be:
/// std::fill(arrays_[operatingArray_].begin(),
/// arrays_[operatingArray_].end(), nullptr);
std::memset(&(arrays_[operatingArray_][0]),0,
 arrays_[operatingArray_].size()*sizeof(int*));

If you are in debug mode then you may see a difference but with optimizations ON seems unlikely. If there is a difference they you should add that as a comment to explain. Otherwise it is not non standard to use std::memset I would just add a comment on why.

This seems perfectly valid to me:

uintptr_t index = reinterpret_cast<uintptr_t>(address);

The uintptr_t is specifically designed to hold a pointer without loss of data. Using reinterpret_cast<>() is valid to indicate the danger of the conversion. But it is a valid conversion.

Again I see nothing unstandard about:

/// The following line is nonstandard and should be:
/// std::fill(lastArray.begin(), lastArray.end(), nullptr);
std::memset(&lastArray[0], 0, lastArray.size()*sizeof(int*));

But I would add a comment on why you chose std::memset().

No need for the recursive call here:

if (nItems_ + 1 > maxItems_)
{
 expand();
 return insert(address);
}

I would just let it fall through. If there is a possibility that expand() does not grow it enough then use a loop rather than recursion. That will make it easier for the compiler to unroll and do other optimizations.

Now what I would consider a good hashing function:

uintptr_t HashTable<TItem>::computeIndex(const TItem* address) const

Question 8

I just checked the implementation of std::hash and it seems to be the same as mine: return reinterpret_cast<size_t>(__p); The performance difference may be caused by not using primes as you suggested.

Question 9

The reason why I marked the memsets as nonstandard is because I am comparing the result of a memset to nullptr, which does not have to be represented by all zeros according to the standard.

Question 10

I changed the bitshift to index >>= 4; and the maxLoadFactor_ to 0.4 and I started to get consistent results. There are still very many collisions, but it started outperforming std::unordered_set. But this would probably change on a 32 bit machine. Maybe I could try quadratic probing instead of linear.

Question 11

Anyhow thank you very much for looking into my source code. I feel much more confident now.

Loki Astari Loki Astari 97.7k5 gold badges126 silver badges341 bronze badges · Accepted Answer · 2013-10-07 12:14:07Z

Find it hard to believe that std::fill is slower than std::memeset

/// The following line is nonstandard and should be:
/// std::fill(arrays_[operatingArray_].begin(),
/// arrays_[operatingArray_].end(), nullptr);
std::memset(&(arrays_[operatingArray_][0]),0,
 arrays_[operatingArray_].size()*sizeof(int*));

If you are in debug mode then you may see a difference but with optimizations ON seems unlikely. If there is a difference they you should add that as a comment to explain. Otherwise it is not non standard to use std::memset I would just add a comment on why.

This seems perfectly valid to me:

uintptr_t index = reinterpret_cast<uintptr_t>(address);

The uintptr_t is specifically designed to hold a pointer without loss of data. Using reinterpret_cast<>() is valid to indicate the danger of the conversion. But it is a valid conversion.

Again I see nothing unstandard about:

/// The following line is nonstandard and should be:
/// std::fill(lastArray.begin(), lastArray.end(), nullptr);
std::memset(&lastArray[0], 0, lastArray.size()*sizeof(int*));

But I would add a comment on why you chose std::memset().

No need for the recursive call here:

if (nItems_ + 1 > maxItems_)
{
 expand();
 return insert(address);
}

I would just let it fall through. If there is a possibility that expand() does not grow it enough then use a loop rather than recursion. That will make it easier for the compiler to unroll and do other optimizations.

Now what I would consider a good hashing function:

uintptr_t HashTable<TItem>::computeIndex(const TItem* address) const

I just checked the implementation of std::hash and it seems to be the same as mine: return reinterpret_cast<size_t>(__p); The performance difference may be caused by not using primes as you suggested.
The reason why I marked the memsets as nonstandard is because I am comparing the result of a memset to nullptr, which does not have to be represented by all zeros according to the standard.
I changed the bitshift to index >>= 4; and the maxLoadFactor_ to 0.4 and I started to get consistent results. There are still very many collisions, but it started outperforming std::unordered_set. But this would probably change on a 32 bit machine. Maybe I could try quadratic probing instead of linear.
Anyhow thank you very much for looking into my source code. I feel much more confident now.

Stack Exchange Network

Fast hash table based set of pointers

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Fast hash table based set of pointers

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions