I wrote my own implementation of generic hash table. I’m using linked list to resolve collision problem. I want to share my code with you and get your opinion about it, and if there are any advice or optimisations that you can point to.
HashTable.h file
/**
* @author Abdelhakim RAFIK
* @version v1.0.1
* @license MIT License
* @Copyright Copyright (c) 2021 Abdelhakim RAFIK
* @date Feb 2021
*/
#include <stdlib.h>
#include <string.h>
/* hash table node definition */
typedef struct _node
{
unsigned char *key;
void *value;
struct _node *next;
} ht_node_t;
/* hash table definition */
typedef struct
{
unsigned int size;
unsigned int count;
ht_node_t **items;
} ht_table_t;
/**
* create new hash table with given size
* @param size items table size
* @return pointer to creates hash table
*/
ht_table_t* ht_create(int size);
/**
* create items table index from given key
* @param key key value
* @return index of items table between 0 and table size-1
*/
unsigned int ht_hash(unsigned int maxSize, unsigned char *key);
/**
* inset new element to hash table by given key
* @param table pointer to created hash table
* @param key key for value to insert
* @param value element to be inserted
* @param size size of element in bytes
* @return 1: inserted successfully
* 0: error occured while inserting value
*/
unsigned char ht_insert(ht_table_t* table, unsigned char *key, void *value, size_t size);
/**
* find associated node to given key
* @param table pointer to hash table
* @param key key associated to the node
* @param index index of found node in items table
* @param prev pointer to previous node
* @return node pointer if found or NULL otherwise
*/
ht_node_t* ht_find_node(ht_table_t* table, unsigned char *key, unsigned int *index, ht_node_t **prev);
/**
* find value in hash table by given key
* @param table pointer to hash table
* @param key key of the value searching
*/
void* ht_find(ht_table_t* table, unsigned char *key);
/**
* delete value by given key
* @param table pointer to hash table
* @param key value's key to delete
* @return 1: deleted successfully
* 0: error occured while deleting the value
*/
unsigned char ht_delete(ht_table_t* table, unsigned char *key);
/**
* delete all table items
* @param table pointer to hash table
*/
unsigned char ht_delete_all(ht_table_t* table);
/**
* free allocated memory for given hash table
* @param table pointer to hash table
*/
unsigned char ht_free(ht_table_t* table);
HashTable.c file
/**
* @author Abdelhakim RAFIK
* @version v1.0.1
* @license MIT License
* @Copyright Copyright (c) 2021 Abdelhakim RAFIK
* @date Feb 2021
*/
#include "hashTable.h"
/**
* create new hash table with given size
* @param size items table size
* @return pointer to creates hash table
*/
ht_table_t* ht_create(int size)
{
// check given table items size, should be greather than 0
if(size <= 0) return NULL;
// create new hash table
ht_table_t* table = (ht_table_t*) malloc(sizeof(ht_table_t));
table->size = size;
table->count = 0;
// create items table as pointer's table to nodes
table->items = (ht_node_t**) malloc(size * sizeof(ht_node_t*));
// set all items pointers to NULL
for(int i=0; i<size; ++i)
table->items[i] = NULL;
// return created table
return table;
}
/**
* create items table index from given key
* @param key key value
* @return index of items table between 0 and table size-1
*/
unsigned int ht_hash(unsigned int maxSize, unsigned char *key)
{
unsigned int hashedKey = 0;
// create hash from key
for(; *key != '0円'; ++key)
hashedKey = (hashedKey * 19 + *key) % maxSize;
// return created index
return hashedKey;
}
/**
* inset new element to hash table by given key
* @param table pointer to created hash table
* @param key key for value to insert
* @param value element to be inserted
* @param size size of element in bytes
* @return 1: inserted successfully
* 0: error occured while inserting value
*/
unsigned char ht_insert(ht_table_t* table, unsigned char *key, void *value, size_t size)
{
// check table and key are not NULL
if(!table || !key) return 0;
// index from key
unsigned int index;
// check whether the element with associated key already in table and get generated index
ht_node_t *newNode = ht_find_node(table, key, &index, NULL);
if(newNode)
{
// reallocate memory for new content value
newNode->value = (void*) realloc(newNode->value, size);
// copy value to allocated node value memory
memcpy(newNode->value, value, size);
}
else
{
// create new node
newNode = (ht_node_t*) malloc(sizeof(ht_node_t));
// create node key and copy it's value
newNode->key = (char*) malloc(strlen(key) + 1);
strcpy(newNode->key, key);
// create node value memory
newNode->value = (void*) malloc(size);
// copy value to allocated node value memory
memcpy(newNode->value, value, size);
// if index position is not empty link nodes
if(table->items[index] != NULL)
newNode->next = table->items[index];
else
newNode->next = NULL;
// add created node to items table
table->items[index] = newNode;
// increment table count
++table->count;
}
return 1;
}
/**
* find associated node to given key
* @param table pointer to hash table
* @param key key associated to the node
* @param index index of found node in items table
* @param prev pointer to previous node
* @return node pointer if found or NULL otherwise
*/
ht_node_t* ht_find_node(ht_table_t* table, unsigned char *key, unsigned int *index, ht_node_t **prev)
{
// get index for given key
unsigned int _index = ht_hash(table->size, key);
if(index)
*index = _index;
// get head items at index position
ht_node_t* currentNode = table->items[_index];
ht_node_t* _prev = NULL;
// search in linked array nodes until a NULL pointer
while(currentNode)
{
if(strcmp(currentNode->key, key) == 0)
break;
// go to next linked node
_prev = currentNode;
currentNode = currentNode->next;
}
if(currentNode)
{
if(prev)
*prev = _prev;
// return found node
return currentNode;
}
// value associated with the key not found
return NULL;
}
/**
* find value in hash table by given key
* @param table pointer to hash table
* @param key key of the value searching
*/
void* ht_find(ht_table_t* table, unsigned char *key)
{
// check table and key are not NULL
if(!table || !key) return NULL;
// get associated node to key
ht_node_t *currentNode = ht_find_node(table, key, NULL, NULL);
// node not found
if(!currentNode) return NULL;
// return node's value
return currentNode->value;
}
/**
* delete value by given key
* @param table pointer to hash table
* @param key value's key to delete
* @return 1: deleted successfully
* 0: error occured while deleting the value
*/
unsigned char ht_delete(ht_table_t* table, unsigned char *key)
{
// check table and key are not NULL
if(!table || !key) return 0;
ht_node_t *prevNode;
unsigned int index;
// get associated node to key
ht_node_t* currentNode = ht_find_node(table, key, &index, &prevNode);
// node not found
if(!currentNode) return 0;
// set previous node next element to current node next element
if(currentNode->next != NULL)
{
if(prevNode != NULL)
prevNode->next = currentNode->next;
else
table->items[index] = currentNode->next;
}
else if(prevNode == NULL)
table->items[index] = NULL;
// delete node
free(currentNode->key);
free(currentNode->value);
free(currentNode);
// decrement table count
--table->count;
return 1;
}
/**
* delete all table items
* @param table pointer to hash table
*/
unsigned char ht_delete_all(ht_table_t* table)
{
// check table is not NULL
if(!table) return 0;
// delete table items if not empty
if(table->count != 0)
{
ht_node_t *currentNode = NULL, *nextNode = NULL;
for (int i=0; i<table->size; ++i)
{
// get next node
nextNode = table->items[i];
while(nextNode)
{
currentNode = nextNode;
nextNode = currentNode->next;
// free memory allocated by current node
free(currentNode->key);
free(currentNode->value);
free(currentNode);
}
table->items[i] = NULL;
}
// set table count to 0
table->count = 0;
}
}
/**
* free allocated memory for given hash table
* @param table pointer to hash table
*/
unsigned char ht_free(ht_table_t* table)
{
// check table is not NULL
if(!table) return 0;
// delete table items if not empty
if(table->count != 0)
ht_delete_all(table);
// free table parts
free(table->items);
free(table);
return 1;
}
1 Answer 1
I see a lot of thought went into this code.
License
As someone looking for a hash-table implementation, I would consider a clear MIT license an important point towards your implementation.
Interface
This is well done, with comments where they are most useful. As a user looking at your interface, I have some questions.
return | function | comments |
---|---|---|
ht_table_t* | ht_create | int size should be an unsigned type like size_t |
unsigned int | ht_hash | the hash value should be transparent, return success, (as int or boolean , depending on the version of C); maxSize is undocumented and what is that? (Ed: I see now, this is the hash function; consider a static function; it is internal to your implementation.) |
unsigned char | ht_insert | int or boolean ; don't say what happens on collision |
ht_node_t* | ht_find_node | how do I deal with a ht_node_t ? I assume index and prev are set, but I do not care about the internal representation |
void* | ht_find | good name, documentation |
unsigned char | ht_delete | int or boolean |
unsigned char | ht_delete_all | what happens to the memory? this causes a compile error; should be void |
unsigned char | ht_free | what does the return mean? |
I suggest a few helper static
functions, internal to the implementation (private), and have the public functions return less.
Code
I see "HashTable.h", but in code, it's "hashTable.h". "@Copyright" doesn't fit with the other, lower-case tags. These case-inconsistencies may be bugs.
In ht_create
, (ht_table_t*)malloc(sizeof(ht_table_t))
could be malloc(sizeof *table)
. The cast is confusing, except where you want to port to C++
, (as you seem to here.) If you want to specify a totally opaque type, then you should declare it in header as something like struct ht_table_t;
and leave the implementation to the C file. You set all pointers to null appropriately.
You don't check that memory allocations work, so you open yourself up to undefined behaviour; practically, which could be a segfault or something worse if the memory allocation fails. Because the parameter for the requested memory size comes from outside your module, and is something you don't control, I would be especially careful about checking.
ht_hash
should be private, static
. A lot of unnecessary work could be saved by taking the remainder outside the loop.
In ht_insert
, you trust users a lot to take the size accurately, every time.
You accept unsigned char *key
, but this should be char *
for consistency, and library functions depend on it.
Three memory allocations, where one will do, slows down your code. However, be very careful about aligning strings.
-
1\$\begingroup\$ "you open yourself up to a segfault" - if lucky! This is Undefined Behaviour, so anything can happen (including appearing to work when under test, then failing catastrophically in real use!). \$\endgroup\$Toby Speight– Toby Speight2022年03月03日 08:12:40 +00:00Commented Mar 3, 2022 at 8:12
-
\$\begingroup\$ @TobySpeight That is a very good point. I've edited my paragraph, thanks. \$\endgroup\$Neil– Neil2022年03月03日 19:55:05 +00:00Commented Mar 3, 2022 at 19:55
unsigned char
? \$\endgroup\$