I created a templated Hash Map class in C++, which supports most of the basic C++ types (e.g. int
, float
, char
...). I would like some feedback regarding my implementation and what is bad and can be improved.
#ifndef _LIB_HASHMAP
#define _LIB_HASHMAP
#include <exception>
#include <string>
namespace lib{
namespace details{
unsigned int generate_hash(const int& key, const size_t& hash_map_size){
return key%hash_map_size;
}
unsigned int generate_hash(const std::string& key, const size_t& hash_map_size){
return key.size()%hash_map_size;
}
unsigned int generate_hash(const char* key, const size_t& hash_map_size){
size_t position = 0;
for (size_t i=0; key[i]!='0円'; i++){
position += key[i];
}
return position%hash_map_size;
}
}
template <typename T, typename U>
class hash_map{
private:
struct hash_entry{
T key;
U value;
hash_entry(){}
hash_entry(const T& _key, const U& _value)
: key(_key), value(_value){}
};
hash_entry* hash_entries;
unsigned int* filled_positions;
unsigned int hash_map_size;
unsigned int num_entries;
public:
typedef const hash_entry* const_iterator;
typedef hash_entry* iterator;
explicit hash_map(unsigned int);
~hash_map();
iterator begin();
iterator end();
const_iterator cbegin();
const_iterator cend();
unsigned int size();
void add(const T&, const U&);
U& operator[](const T&);
};
template <typename T, typename U>
hash_map<T,U>::hash_map(unsigned int _size):
hash_entries(new hash_entry[_size]),
filled_positions(new unsigned int[_size]()),
hash_map_size(_size),
num_entries(0){
}
template <typename T, typename U>
hash_map<T,U>::~hash_map(){
delete[] filled_positions;
delete[] hash_entries;
}
template <typename T, typename U>
typename hash_map<T,U>::const_iterator hash_map<T,U>::cbegin(){
return hash_entries;
}
template <typename T, typename U>
typename hash_map<T,U>::const_iterator hash_map<T,U>::cend(){
return hash_entries+hash_map_size;
}
template <typename T, typename U>
typename hash_map<T,U>::iterator hash_map<T,U>::begin(){
return hash_entries;
}
template <typename T, typename U>
typename hash_map<T,U>::iterator hash_map<T,U>::end(){
return hash_entries+hash_map_size;
}
template <typename T, typename U>
unsigned int hash_map<T,U>::size(){
return hash_map_size;
}
template <typename T, typename U>
void hash_map<T,U>::add(const T& _key, const U& _value){
if (num_entries >= hash_map_size){
throw std::out_of_range("std::out_of_range : hash_map full");
}
unsigned int position = details::generate_hash(_key,hash_map_size);
while (filled_positions[position]){
position = (position+1)%hash_map_size;
}
filled_positions[position] = 1;
hash_entries[position] = hash_entry(_key,_value);
num_entries += 1;
}
template <typename T, typename U>
U& hash_map<T,U>::operator[](const T& _key){
unsigned int position = details::generate_hash(_key,hash_map_size);
unsigned int count = 0;
while (hash_entries[position].key != _key){
if (count > hash_map_size){
throw std::invalid_argument("std::invalid_argument : key not in hash_map");
}
position = (position+1)%hash_map_size;
count += 1;
}
return hash_entries[position].value;
}
}
#endif // _LIB_HASHMAP
1 Answer 1
Although the description claims that most C++ types are supported, I see only int
and two kinds of string. The std::hash
class is specialized for quite a few more types than that:
template<> struct hash<bool>;
template<> struct hash<char>;
template<> struct hash<signed char>;
template<> struct hash<unsigned char>;
template<> struct hash<char16_t>;
template<> struct hash<char32_t>;
template<> struct hash<wchar_t>;
template<> struct hash<short>;
template<> struct hash<unsigned short>;
template<> struct hash<int>;
template<> struct hash<unsigned int>;
template<> struct hash<long>;
template<> struct hash<long long>;
template<> struct hash<unsigned long>;
template<> struct hash<unsigned long long>;
template<> struct hash<float>;
template<> struct hash<double>;
template<> struct hash<long double>;
template< class T > struct hash<T*>;
In addition, the std::string
hash is very collision-prone, as it uses only the length of the string (and the char*
version doesn't even use std::strlen()
- are you intending to even reinvent that wheel?).
std::size_t
is missing its necessary include (usually <cstdlib>
) and is consistently misspelt.
We need to include <stdexcept>
for std::out_of_range
and std::invalid_argument
.
The compiler-generated copy constructor won't do what we want with hash_entries
or filled_positions
- that's dangerous, and will lead to multiple objects believing they own the pointers, and double deletion.
cbegin()
and cend()
functions ought to be usable on a const hash_map
(and begin()
and end()
ought to be overloaded suitably, too). Other const methods are lacking (such as find()
) which make const maps much less useful than they ought to be.
-
1\$\begingroup\$ You can also add that
size()
should beconst
and that aconst
version ofoperator[]
could be provided (plus a way to remove entries). If c++17 is an option,std::string_view
could be considers as parameter type for key. \$\endgroup\$Calak– Calak2018年12月04日 19:04:30 +00:00Commented Dec 4, 2018 at 19:04
T
andU
instead ofK
for key andV
for value? \$\endgroup\$int
and the%hash_map_size
would be done in your class on the returned value. \$\endgroup\$char*
hashed based on its content whilestd::string
is hashed only based on its size? \$\endgroup\$