I'm trying to improve my understanding of inheritance and template classes in C++ so I coded a hash table (see compilable source code below).
I would be greatly appreciative of any comments on how to improve this code.
Description: The LinkedList
template class inherits from the template class Stack
. The Stack
class contains a member variable of type LLNode
which inherits from the template class Node
. The HashTable
class contains insert, delete and search operations. The constructor accepts a table size and two function pointers representing the hash-map and the prehash function.
#include<iostream>
#include<cmath>
template<class T>
class Node {
public:
Node(T key);
T getKey();
protected:
T key_;
};
template<class T> Node<T>::Node(T key):key_(key)
{}
template<class T> T Node<T>::getKey()
{
return key_;
}
template<class T>
class LLNode : public Node<T> {
public:
LLNode(T key);
LLNode<T>* getNext();
void setNext(LLNode<T>* next);
protected:
LLNode<T>* next_;
};
template<class T> LLNode<T>::LLNode(T key):Node<T>(key), next_(0)
{}
template<class T> LLNode<T>* LLNode<T>::getNext()
{
return next_;
}
template<class T> void LLNode<T>::setNext(LLNode<T>* next)
{
next_ = next;
}
template<class T>
class Stack {
public:
// constructor
Stack();
// destructor
virtual ~Stack();
// implements stack data structure
virtual void push(T data);
virtual void pop();
// return the number of nodes in the stack
int getSize() const;
int peek();
// wrapper functions for printing the list
void reversePrintList() const;
void printList() const;
protected:
LLNode<T>* createNode(T insertionKey);
LLNode<T>* head_;
int size_;
void reversePrintWorker(LLNode<T>*) const;
void printWorker(LLNode<T>*) const;
};
template<class T> Stack<T>::Stack():head_(0), size_(0)
{}
template<class T> Stack<T>::~Stack()
{
while(head_!=0)
{
pop();
}
}
template<class T> int Stack<T>::getSize() const
{
return size_;
}
template<class T> int Stack<T>::peek()
{
return head_->getKey();
}
template<class T> LLNode<T>* Stack<T>::createNode(T insertionKey)
{
LLNode<T>* insertionPtr = new LLNode<T>(insertionKey);
return insertionPtr;
}
template<class T> void Stack<T>::push(T data)
{
// create new node on the heap
LLNode<T>* newNode = createNode(data);
if(head_ == 0)
{
// assign the head pointer to the address of the new node
head_ = newNode;
}
else
{
// first link the new node to the first node in the list
newNode->setNext(head_);
// now assign the head pointer to the address of the first new node
head_ = newNode;
}
size_++;
}
template<class T> void Stack<T>::pop()
{
if(head_ != 0)
{
LLNode<T>* tp = head_;
head_ = head_->getNext();
delete tp;
size_--;
}
}
template<class T> void Stack<T>::reversePrintWorker(LLNode<T>* currPtr) const
{
if(currPtr != 0)
{
reversePrintWorker(currPtr->getNext());
std::cout << currPtr->getKey() << " ";
}
}
template<class T> void Stack<T>::printWorker(LLNode<T>* currPtr) const
{
if(currPtr != 0)
{
std::cout << currPtr->getKey() << " ";
printWorker(currPtr->getNext());
}
}
template<class T> void Stack<T>::reversePrintList() const
{
reversePrintWorker(head_);
}
template<class T> void Stack<T>::printList() const
{
//if(head_ != 0)
printWorker(head_);
}
template<class T>
class LinkedList : public Stack<T> {
public:
// deletes node at index 0 <= i <= size - 1
void deleteNode(int i);
// returns the index of the node with a given key
int indexOfKey(T key);
// return the key of a node at index i
int getData(int i) const;
};
template<class T> void LinkedList<T>::deleteNode(int idx)
{
if(idx == 0)
{
this->pop();
}
else
{
// declare traversal pointer and assign to head
LLNode<T>* tp = this->head_;
// declare previous pointer
LLNode<T>* pp = 0;
for(int i = 0; i < idx; i++)
{
// assign previous pointer to address of current node
pp=tp;
// advance traversal pointer
tp = tp->getNext();
}
pp->setNext( tp->getNext() );
delete tp;
(this->size_)--;
}
}
template<class T> int LinkedList<T>::getData(int idx) const
{
// declare traversal pointer and assign to head
LLNode<T>* tp = this->head_;
// iterate traversal pointer through the linked list
for(int i = 0; i < idx; i++) tp = tp->getNext();
// access traversal pointer's data
return tp->getKey();
}
template<class T> int LinkedList<T>::indexOfKey(T targetKey)
{
// declare traversal pointer and assign to head
LLNode<T>* tp = this->head_;
for(int i=0; i < this->size_; i++)
{
// check if the traversal pointer is pointing to a node which contains the key
if( tp->getKey() == targetKey ) return i;
// advance traversal pointer
tp = tp->getNext();
}
return -1;
}
class HashTable {
public:
// constructor
HashTable(int tableSize,
int (*prehash)(std::string key),
int (*hashmap)(int prehashedKey, int tableSize));
// dictionary operations
const bool search(std::string key) const;
void insert(std::string key);
void del(std::string key);
void printTable() const;
private:
// function pointers for prehash and hash map
int (*prehash_)(std::string key);
int (*hashmap_)(int prehashedKey, int m);
int tableSize_;
LinkedList<int>* table_;
};
HashTable::HashTable(int tableSize,
int (*prehash)(std::string key),
int (*hashmap)(int prehashedKey, int m))
:tableSize_(tableSize)
{
table_ = new LinkedList<int>[tableSize];
prehash_ = prehash;
hashmap_ = hashmap;
}
const bool HashTable::search(std::string key) const
{
// compute the slot associated with the key
int prehashedKey = (*prehash_)(key);
int slot = (*hashmap_)(prehashedKey, tableSize_);
// search the linked list for prehashedKey
int idx = table_[slot].indexOfKey(prehashedKey);
bool isThere = false;
if(idx != -1) isThere = true;
return isThere;
}
void HashTable::insert(std::string key)
{
// compute the slot associated with the key
int prehashedKey = (*prehash_)(key);
int slot = (*hashmap_)(prehashedKey, tableSize_);
// search the linked list for prehashedKey
int idx = table_[slot].indexOfKey(prehashedKey);
if(idx == -1)
{
// push the item into the linked list
table_[slot].push(prehashedKey);
}
else
{
std::cout << "Hash table already contains target key.\n";
}
}
void HashTable::del(std::string key)
{
// compute the slot associated with the key
int prehashedKey = (*prehash_)(key);
int slot = (*hashmap_)(prehashedKey, tableSize_);
// search the linked list for prehashedKey
int idx = table_[slot].indexOfKey(prehashedKey);
if(idx != -1)
{
// delete node at index idx
table_[slot].deleteNode(prehashedKey);
}
else
{
std::cout << "Hash table does not contain target key.\n";
}
}
void HashTable::printTable() const
{
for(int i=0; i < tableSize_; i++)
{
table_[i].printList();
std::cout << std::endl;
}
}
int prehash(std::string key)
{
int len = key.length();
int prehashedKey = 0;
for(int i = 0; i < len;i++) prehashedKey += ( (int)key[i] - 96 ) * pow(26,len-1-i);
return prehashedKey;
}
int hashmap(int prehashedKey, int tableSize)
{
return prehashedKey % tableSize;
}
int main()
{
// initializations
int NUMSLOTS = 21;
int NUMNAMES = 20;
std::string names[] =
{ "james", "jill", "eric", "sam", "alex", "sarah", "kate", "renee", "david", "jenny",
"felix", "henry", "peter", "alexis", "jing", "alfred", "anthony", "jeremy", "thomas", "ben"};
// fill table
HashTable myHashTable(NUMSLOTS,prehash,hashmap);
for(int i=0; i<NUMNAMES; i++) myHashTable.insert(names[i]);
// Print contents of hash table
myHashTable.printTable();
}
4 Answers 4
Design Review
Not sure you need both Node
and LLNode
they seem to be the same thing.
Style Review
If a function is only one line do it in-line in the class rather than breaking it out.
Code Review
Use References
Return by reference when you can (to avoid a copy). If you don't want the user modifying it then use const reference.
Node(T key);
// Pass key by reference. Otherwise you are copying it to the
// parameter then you are copying it to the member.
//
// If you can pass by r-value reference so you can move it into
// the objext
So I would have done:
Node(T const& key) // Copy constructor.
: key_(key) // but only one copy
{}
Node(T&& key) // Move constructor.
: key_(std::forward<T>(key))
{}
When retrieving the key you return by value. This means you are going to make a copy of the key to return. I assume you don't want people modiying the key so you should return by const reference.
T getKey();
I would have done:
T const& getKey() const { return key_;}
^^^^^^^^ return const reference
^^^^^ this method does not change state
RAII
When you play with pointers you need to define the ownership. If a class owns a pointer then you need to make sure that it correctly handles that pointer when being copied and assigned. To do this you need to obey the rule of three (rule of five).
Basically this means you need to define destructor/copy constructor/copy assignment operator (for rule of three) add move constructor/move assignment operator (for rule of five).
Now defining does not mean it needs to do anything you can delete it (or make it undefined) so it can not be used if that is appropriate. But you must think about it and define it.
Stack: This class owns a pointer:
LLNode<T>* head_;
But I don't see a copy or move operators. Thus I am sure this will not work correctly (because the compiler will generate default versions).
{
stack<int> x;
x.push(5);
stack<int> y(x)
} // Code will break here.
Top
This is usually called top()
template<class T> int Stack<T>::peek()
{
return head_->getKey();
}
Also it should be returning a T
(the type being stored). Though you should return T by reference.
Is there really a need for this function?
template<class T> LLNode<T>* Stack<T>::createNode(T insertionKey)
{
LLNode<T>* insertionPtr = new LLNode<T>(insertionKey);
return insertionPtr;
}
It looks like the just a call to the constructor. Why not just call that constructor.
If you modify the constructor of LLNode
to pass the next pointer then you can really simplify the push.
I would write it like this:
template<class T> void Stack<T>::push(T const& data)
{
head = new LLNode<T>(data, head);
size_++;
}
template<class T> void Stack<T>::push(T&& data)
{
head = new LLNode<T>(std::forward<T>(data), head);
size_++;
}
Sure you need a print. But why limit it to printing to std::cout. Sure you can make that the default but you should be able to print to any stream. You can also add an output operator to allow you to use <<
to print the stream.
template<class T> void Stack<T>::reversePrintWorker(LLNode<T>* currPtr, std::ostream& stream = std::cout) const;
template<class T> void Stack<T>::printWorker(LLNode<T>* currPtr, std::ostream& stream = std::cout) const;
std::ostream& operator<<(std::ostream& str, Stack<T> const& data) {
str << "[";
data.printWorker(currPtr, str);
str << "]";
return str;
}
Not sure why you did this:
(this->size_)--;
Just do:
--size_;
This be should returning a T
.
template<class T> int LinkedList<T>::getData(int idx) const
Hashing
Not sure I would require two functions for hashing. If its a good hash you should just be able to do the modulus of the number of slots. You should also provide a default hash function. Most people don't know how to write hash function and making them do so will cause lots of problems.
Using C function pointers for the functions is also a bit old school (and prevents optimization). Use functors (or lambdas (fancy functors)).
Always use the initializer list rather than setting the values in the body of the constructor.
-
\$\begingroup\$ I used
(this->size_)--
instead ofsize_--
inLinkedList
because it is inherited from the template classStack
. I believe this is one of the idiosyncrasies of inheritance with templates. \$\endgroup\$user11881– user118812015年12月12日 22:55:50 +00:00Commented Dec 12, 2015 at 22:55 -
\$\begingroup\$ I implemented many of your suggestions in the
Stack
andLinkedList
template classes. On further reflection, I'm wondering if you would recommend simply eliminating the raw pointers altogether and usingstd::unique_ptr<LLNode>
instead. Presumably this would obviate the need for copy constructor/assignment operator. \$\endgroup\$user11881– user118812015年12月13日 01:19:51 +00:00Commented Dec 13, 2015 at 1:19 -
\$\begingroup\$ @user11881: Yep that is a perfectly acceptable. \$\endgroup\$Loki Astari– Loki Astari2015年12月13日 20:57:09 +00:00Commented Dec 13, 2015 at 20:57
Focusing mainly on the hash-table:
The main reason for using a hash-table-like data structure is to get constant-time (or near constant) lookup. When you decided to use a linked list for the backing-store of your hash-table, you lost that benefit. indexOfKey()
is linear time. I would just throw that list out and use a std::vector
as the backing store. Then there are several strategies you can use to resolve collisions. You can find an overview of each on Wikipedia.
If you're interested in leaning templates, use them for the table. Why limit it to integers only? The key type in a hash-table is often a string, but again, you don't have to be limited to that, the key type could also be templated.
I question the necessity for the hashmap_
function pointer and this:
int hashmap(int prehashedKey, int tableSize)
{
return prehashedKey % tableSize;
}
Is there any other way to index into a hash-table other than the modulo operator? Not that I know of. I mean, you could use key & (size - 1)
if the size is known to be power-of-two, but that's a micro optimization that the compiler is probably already doing. Remove that extra member function pointer and use %
directly. It will make your code clearer and faster.
The hash function, which you call prehash_
, could also be a template parameter. Take a leaf out of the C++ Standard Library's book, make it compatible with std::hash
and you won't even have to provide a custom hasher for common types like integers and strings.
All in all, your HashTable
should be looking something like this:
template
<
class Key,
class Val,
class Hasher = std::hash<Key>
>
class HashTable
{
public:
// your interface...
private:
std::vector<Val> buckets;
};
Since the hasher is resolved at compiler time, you don't even need to store a copy of it, you can instantiate directly when applying it on a key:
static std::size_t hashOf(const Key & key)
{
return Hasher{}(key);
}
Other general details and guidelines:
When you're taking a potentially large object, such as a
std::string
, in a function parameter, and the function is only looking at the object without copying it, pass by const reference to avoid the copy. E.g.:bool search(const std::string & key) { ... ^^^^^ ^
Simple types like integers can be passed by value, of course. The hardware has built-in registers for those.
const
on a return type (const bool search(std::string key) const
) is meaningless at best, but can prevent optimal code from being generated if applied to objects. Don't get into the habit of doing that.Node
andLLNode
seems like gratuitous obfuscation. Just makeNode
an inner private class of the linked list. No need at all for it to be a public type. Also, just leavenext
publicly visible. Having aget/set
pair is just a verbose way of defining a public variable.Don't assign zero to pointers (like in here
LLNode<T>* pp = 0;
), not anymore, at least. Now there's thenullptr
literal (since C++11).
-
\$\begingroup\$ Can you please explain why the curly brackets are required in the line
return Hasher{}(key);
. I found that removing them leads to a compile error. \$\endgroup\$user11881– user118812015年12月13日 01:11:57 +00:00Commented Dec 13, 2015 at 1:11 -
\$\begingroup\$ @user11881, sorry, my bad, should have commented it.
std::hash
is not really a function, but a class (what is called a function object or functor), so we have to create an instance of it before calling theoperator ( )
. That's whatHasher{}
is doing. C++11 allows both{}
and()
to declare an instance of an object, so that line is also equivalent toHasher()()
, but I though the repeated ()() would look even weirder. So that line is basically declaring an objectHasher{}
and callingoperator(key)
in one line. Does that make sense to you? \$\endgroup\$glampert– glampert2015年12月13日 02:52:26 +00:00Commented Dec 13, 2015 at 2:52
Regarding class hierarchy and inheritance:
I'm not really convinced about the reasoning why you need to have that extra LLNode<T>
derived from Node<T>
instead of just LLNode<T>
and simply merge these classes.
An indication this seems useless, is that you never use Node<T>
anywhere else, but just for inheritance.
template<class T>
class LinkedList : public Stack<T> {
I don't understand this. A Stack
might be a specialization of a LinkefList
, not vice versa.
It's not the point for OOP design, to put smallest pieces of data structure operation policies into base classes, and derive just straight forward up to the final class structure.
What you want is guaranteed interfaces regarding several aspects, but keep them in separate constraints and base classes.
To detect constraint violations early, use SFINAE techniques.
Also you might want to get informed about mixins and the CRTP.
-
\$\begingroup\$ My reason for defining a separate
Node
base class is so I can inherit different types of nodes from it. For example, singly linked list node, doubly linked list node, BST node etc. I'm inheritingLinkedList
fromStack
because myLinkedList
adds functionality to a Stack and thus implements an "is-a" relation. Perhaps the naming is confusing. \$\endgroup\$user11881– user118812015年12月12日 20:27:23 +00:00Commented Dec 12, 2015 at 20:27 -
\$\begingroup\$ @user11881 "Perhaps the naming is confusing." Sure, that was one of my main points about this. Anyways I don't see so much points about evolving a class hierarchy the way you're doing it. A better technique to add functionality might be to use mixin base classes (probably as CRTP to avoid dynamic polymorphism), instead of such deep inheritance hierarchy. I still don't see the point of
Node
either, single LL nodes and double LL nodes need to be handled very different, and the only value you get from it, is just saving to type theKey
stuff in derived classes. I doubt it's worth it. \$\endgroup\$πάντα ῥεῖ– πάντα ῥεῖ2015年12月12日 20:42:00 +00:00Commented Dec 12, 2015 at 20:42
Code changes
I changed
HashTable
to a template class as suggested.The hashmap constructor argument is now an optional argument with default value given by the unsigned modulo operator.
I have also implemented table doubling/halving.
I found comparable performance with
std::unordered_set<std::string>
by inserting 58,110 english words.
Results
Inserting into hash table...
Insertion time: 0.132802
Load factor: 0.886688
Number of buckets: 65536
Longest chain: 7
Inserting into unordered_set...
Insertion time: 0.095464
Load factor: 0.564849
Longest chain: 6
// a basic hash table class where the items are equivalent to the keys
static int defaultMap(int val, int mod)
{
// always non-negative
return ((val % mod) + mod) % mod;
}
template<class Key, class Prehasher = std::hash<Key> >
class HashTable {
public:
// constructor
HashTable(int tableSize = 1, int (*hashmap)(int val, int mod) = defaultMap);
~HashTable() { delete[] table_; }
// dictionary operations
bool search(const Key& key) const;
void insert(const Key& key);
void del(const Key& key);
void printTable() const;
double load_factor() const { return (double)numElements_/tableSize_; }
int bucket_count() const { return tableSize_; }
int bucket_size(int i) const { return table_[i].getSize(); }
// rebuilds table with size m
void rebuild(int m);
private:
void insertPrivate(const Key& key);
// prehash function
int prehashOf(const Key& key) const { return Prehasher()(key); };
// function pointer for hash map
int (*hashmap_)(int phKey, int m);
int tableSize_;
int numElements_;
LinkedList<Key>* table_;
};
template<class Key, class Prehasher> HashTable<Key, Prehasher>::HashTable(int tableSize,
int (*hashmap)(int phKey, int m))
: hashmap_(hashmap), tableSize_(tableSize)
{
table_ = new LinkedList<Key>[tableSize];
}
template<class Key, class Prehasher> void HashTable<Key, Prehasher>::rebuild(int m)
{
// create an array on the heap to store the new table of size m
LinkedList<Key>* newTable = new LinkedList<Key>[m];
// create a pointer to the old table
LinkedList<Key>* oldTable = table_;
int oldSize = tableSize_;
// resassign member variable of HashTable
table_ = newTable;
tableSize_ = m;
// fill the new table
for(int i=0; i<oldSize; i++)
{
while( oldTable[i].getSize()!=0 )
{
insertPrivate( oldTable[i].top() );
oldTable[i].pop();
}
}
delete[] oldTable;
}
template<class Key, class Prehasher> bool HashTable<Key, Prehasher>::search(const Key& key) const
{
// compute the slot associated with the key
int phKey = prehashOf(key);
int slot = (*hashmap_)(phKey, tableSize_);
// search the linked list for phKey
int idx = table_[slot].indexOfKey(key);
bool isThere = false;
if(idx != -1) isThere = true;
return isThere;
}
template<class Key, class Prehasher>void HashTable<Key, Prehasher>::insertPrivate(const Key& key)
{
int phKey = prehashOf(key);
int slot = (*hashmap_)(phKey, tableSize_);
// push the item into the linked list
table_[slot].push(key);
}
template<class Key, class Prehasher>void HashTable<Key, Prehasher>::insert(const Key& key)
{
if(numElements_+1 > tableSize_) rebuild(2*tableSize_);
// compute the slot associated with the key
int phKey = prehashOf(key);
int slot = (*hashmap_)(phKey, tableSize_);
// search the linked list for phKey
int idx = table_[slot].indexOfKey(key);
if(idx == -1)
{
// push the item into the linked list
table_[slot].push(key);
++numElements_;
}
else
{
std::cout << "Hash table already contains target key " << key << "\n";
}
}
template<class Key, class Prehasher> void HashTable<Key, Prehasher>::del(const Key& key)
{
if(numElements_-1 < tableSize_/4) rebuild(tableSize_/2);
// compute the slot associated with the key
int phKey = prehashOf(key);
int slot = (*hashmap_)(phKey, tableSize_);
// search the linked list for phKey
int idx = table_[slot].indexOfKey(key);
if(idx != -1)
{
// delete node at index idx
table_[slot].deleteNode(idx);
--numElements_;
}
else
{
std::cout << "Hash table does not contain target key.\n";
}
}
template<class Key, class Prehasher> void HashTable<Key, Prehasher>::printTable() const
{
for(int i=0; i < tableSize_; i++)
{
std::cout << i << ": ";
table_[i].printList();
std::cout << std::endl;
}
}
Explore related questions
See similar questions with these tags.