I've made an associative array data structure (more details in the code comments below). What I'm interested in getting a critique on is the usage of double pointers.
- Are they necessary here?
- Am I allocating and freeing the memory correctly?
assoc_array.h
#ifndef __ASSOC_ARRAY_H__
#define __ASSOC_ARRAY_H__
/*
* A data structure to map characters
* to integer values.
*
* The value always starts at 0, and
* can be incremented from there.
*
* There's a space/speed trade-off based on
* the `size` provided. A character, `c`, will be
* inserted according to `c % size`. When more
* than one character maps to the same "bucket",
* a linear search is used to differentiate between
* those characters.
*
*/
typedef struct AssocArray AssocArray;
/*
* Create an array of N (size) buckets.
*/
AssocArray *aaAlloc(size_t size);
void aaFree(AssocArray *assoc_array);
/*
* Increment or set to 0 for the given character.
* Return -1 if an error occurs.
*/
int aaIncValue(AssocArray *assoc_array, char key);
#endif
assoc_array.c
#include <stdbool.h>
#include <stdlib.h>
#include "assoc_array.h"
typedef struct AssocArrayItem {
char key;
int value;
} AssocArrayItem;
typedef struct AssocArrayBucket {
size_t size;
AssocArrayItem **items;
} AssocArrayBucket;
typedef struct AssocArray {
size_t size;
AssocArrayBucket **buckets;
} AssocArray;
AssocArray *aaAlloc(size_t size) {
AssocArray *aa = malloc(sizeof(AssocArray));
aa->size = size;
aa->buckets = calloc(aa->size, sizeof(AssocArrayBucket *));
for (int i = 0; i < aa->size; i++) {
aa->buckets[i] = malloc(sizeof(AssocArrayBucket));
aa->buckets[i]->size = 0;
aa->buckets[i]->items = NULL;
}
return aa;
}
void aaFree(AssocArray *assoc_array) {
for (int i = 0; i < assoc_array->size; i++) {
size_t bucket_size = assoc_array->buckets[i]->size;
for (int j = 0; j < bucket_size; j++) {
if (assoc_array->buckets[i] != NULL &&
assoc_array->buckets[i]->items[j] != NULL) {
free(assoc_array->buckets[i]->items[j]);
}
}
if (assoc_array->buckets[i] != NULL) {
if (assoc_array->buckets[i]->items != NULL) {
free(assoc_array->buckets[i]->items);
}
free(assoc_array->buckets[i]);
}
}
if (assoc_array->buckets != NULL) {
free(assoc_array->buckets);
}
if (assoc_array != NULL) {
free(assoc_array);
}
}
int aaIncValue(AssocArray *assoc_array, char key) {
unsigned int idx = key % assoc_array->size;
AssocArrayBucket *bucket = assoc_array->buckets[idx];
if (bucket == NULL) {
return -1;
}
int incremented_value;
if (bucket->size == 0) {
bucket->size = 1;
bucket->items = calloc(bucket->size, sizeof(AssocArrayItem *));
bucket->items[0] = malloc(sizeof(AssocArrayItem));
bucket->items[0]->key = key;
bucket->items[0]->value = 1;
incremented_value = bucket->items[0]->value;
} else {
bool exists = false;
for (int i = 0; i < bucket->size; i++) {
if (bucket->items[i]->key == key) {
exists = true;
bucket->items[i]->value++;
incremented_value = bucket->items[i]->value;
break;
}
}
if (exists == false) {
bucket->items = realloc(bucket->items, bucket->size + 1);
bucket->items[bucket->size] = malloc(sizeof(AssocArrayItem));
bucket->items[bucket->size]->key = key;
bucket->items[bucket->size]->value = 1;
incremented_value = bucket->items[bucket->size]->value;
bucket->size++;
}
}
return incremented_value;
}
assoc_array_test.c
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include "assoc_array.h"
void printPass() {
if (isatty(STDOUT_FILENO)) {
printf("\e[32mPASS\e[0m\n");
} else {
printf("PASS\n");
}
}
void test1() {
printf("[TEST] Characters that end up in the same bucket inc'd correctly\t");
AssocArray *aa = aaAlloc(26);
// 'm' % 26 == 5
// 'S' % 26 == 5
assert(aaIncValue(aa, 'm') == 1);
assert(aaIncValue(aa, 'm') == 2);
assert(aaIncValue(aa, 'S') == 1);
assert(aaIncValue(aa, 'm') == 3);
assert(aaIncValue(aa, 'S') == 2);
aaFree(aa);
printPass();
}
int main() {
printf("\n");
test1();
return 0;
}
Compiled with:
clang -g -Wall assoc_array.c assoc_array_test.c -o assoc_array_test
1 Answer 1
Allocating.
The functions of
*alloc
family may fail. Always test their return values.bucket->items = realloc(bucket->items, bucket->size + 1);
is plain wrong. It allocates onlybucket->size + 1
bytes. You need(bucket->size + 1) * sizeof(AssocArrayItem *)
of them.
Freeing.
A test for
assoc_array != NULL
happens too late. A test forassoc_array->buckets != NULL
is also too late. You shall test before use, not after.Using double pointers is not justified.
aaAlloc
allocates that many pointers to buckets, and also that many buckets. It is a pure waste of space. Similarly, there is no reason to separately allocate pointers to items, and items themselves.aaIncValue
looks clumsy. There is no need to single out the case ofbucket->size == 0
. What is important is whether the key is found or not, andbucket->size == 0
guarantees that the key is not found. So, considerAssocArrayItem * item = aaFindItemInBucket(bucket, key); if (item == NULL) { bucket = extend_bucket(bucket); item = bucket[bucket->size - 1]; item->key = key; } item->value += 1;
Also, it looks like a misnomer (should be
aaPut
oraaInsert
).As a side note,
size
is not a best name.Considern_items
andn_buckets
-
\$\begingroup\$ Thank you, appreciate your feedback. I knew it felt odd doing so much allocation. I'm excited to update it soon. Nice catch on the realloc too. \$\endgroup\$denvaar– denvaar2024年02月23日 23:29:46 +00:00Commented Feb 23, 2024 at 23:29
-
\$\begingroup\$ To anyone interested, I've refactored the code, which you can find here gist.github.com/denvaar/baf6f6f75bff61f5d18442ba656aaccc \$\endgroup\$denvaar– denvaar2024年02月25日 17:03:32 +00:00Commented Feb 25, 2024 at 17:03