I'm in the process of writing a Generic Container Library in C called CGCL. One of the generic containers I'm implementing is the vector container.
I'm writing this library to improve my knowledge of different data structures, improve my C coding skills, improve my skills with generic programming in C (as opposed to templates from C++), and also hopefully grow the library into something akin to the STL containers from C++ that all C developers can use when they need to use a common data structure (C doesn't have a standard data structure library).
I would like a review on the following (in this order of importance):
- Correctness
- Performance considerations (both memory and time improvements)
- Code style
For some background information, the library naming convention for the container functions is something like:
gcl_<container>_<operation>
where the containers themselves are named like this: gcl_<container>
.
For reference, I'll be including some of the utility headers that contain some of the typedefs and enums I use in the code below. The actual code for the vector is in cgcl_vector.h/.c however.
cgcl_util.h
#ifndef CGCL_UTIL_H
#define CGCL_UTIL_H
#include <stdbool.h>
typedef int (*GCLCompare)(const void *, const void *);
typedef bool(*GCLIsEqual)(const void *, const void *);
#endif
cgcl_errors.h
#ifndef CGCL_ERRORS_H
#define CGCL_ERRORS_H
typedef enum GCLError {
eNoErr = 0,
eInvalidArg,
eFailedAlloc,
eInvalidOperation,
eOutOfBoundsAccess
} GCLError;
#endif
cgcl_vector.h
#ifndef CGCL_VECTOR_H
#define CGCL_VECTOR_H
#include "cgcl_errors.h"
#include "cgcl_util.h"
#include <stddef.h>
#include <stdbool.h>
typedef struct gcl_vector___ gcl_vector;
gcl_vector *gcl_vector_init();
GCLError gcl_vector_push_back(gcl_vector *v, void *elem);
GCLError gcl_vector_insert(gcl_vector *v, void *elem, size_t pos);
GCLError gcl_vector_popback(gcl_vector *v);
GCLError gcl_vector_erase(gcl_vector *v, size_t pos);
GCLError gcl_vector_reserve(gcl_vector *v, size_t n);
void gcl_vector_clear(gcl_vector *v);
void *gcl_vector_find(const gcl_vector *v, const void *value, GCLIsEqual isEqual);
void gcl_vector_remove(gcl_vector *v, void *elem, GCLIsEqual isEqual);
GCLError gcl_vector_set(gcl_vector *v, void *elem, size_t pos);
void *gcl_vector_get(const gcl_vector *v, size_t pos);
bool gcl_vector_empty(const gcl_vector *v);
size_t gcl_vector_size(const gcl_vector *v);
size_t gcl_vector_capacity(const gcl_vector *v);
size_t gcl_vector_max_size(void);
void *gcl_vector_front(const gcl_vector *v);
void *gcl_vector_back(const gcl_vector *v);
void gcl_vector_destroy(gcl_vector *v);
void gcl_vector_foreach(gcl_vector *v, void *(*callback)(void *));
void gcl_vector_sort(gcl_vector *v, GCLCompare sortCompare);
#endif
cgcl_vector.c
#include "cgcl_vector.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
struct gcl_vector___ {
void **data;
size_t size, capacity;
};
static const size_t INITIAL_VECTOR_CAPACITY = 100;
static const size_t MAX_VECTOR_SIZE = UINTMAX_MAX - 1;
gcl_vector *gcl_vector_init()
{
return calloc(1, sizeof(gcl_vector));
}
static GCLError gcl_vector_realloc(gcl_vector *v, size_t newElemCount)
{
void *newData = realloc(v->data, newElemCount * sizeof(*(v->data)));
if (!newData) {
return eFailedAlloc;
}
v->data = newData;
v->capacity = newElemCount;
return eNoErr;
}
static size_t gcl_vector_find_index(gcl_vector *v, void *elem, GCLIsEqual isEqual)
{
for (size_t i = 0; i < v->size; ++i) {
if (isEqual(elem, v->data[i])) {
return i;
}
}
return v->size + 1;
}
GCLError gcl_vector_push_back(gcl_vector *v, void *elem)
{
if (v->size >= gcl_vector_max_size()) {
return eInvalidOperation;
}
GCLError e = eNoErr;
if (v->size == v->capacity) {
const size_t newElemCount = v->capacity == 0 ? INITIAL_VECTOR_CAPACITY :
v->capacity * 2;
e = gcl_vector_realloc(v, newElemCount);
}
if (e == eNoErr) {
v->data[v->size++] = elem;
}
return e;
}
GCLError gcl_vector_insert(gcl_vector *v, void *elem, size_t pos)
{
if (v->size >= gcl_vector_max_size()) {
return eInvalidOperation;
}
GCLError e = eNoErr;
if (v->size == v->capacity) {
const size_t newElemCount = v->capacity == 0 ? INITIAL_VECTOR_CAPACITY :
v->capacity * 2;
e = gcl_vector_realloc(v, newElemCount);
}
if (e == eNoErr) {
memmove(&v->data[pos + 1], &v->data[pos],
(v->size - pos) * sizeof(*(v->data)));
v->data[pos] = elem;
++v->size;
}
return e;
}
GCLError gcl_vector_popback(gcl_vector *v)
{
if (gcl_vector_empty(v)) {
return eInvalidOperation;
}
v->data[--v->size] = NULL;
return eNoErr;
}
void *gcl_vector_find(const gcl_vector *v, const void *value, GCLIsEqual isEqual)
{
for (size_t i = 0; i < v->size; ++i) {
if (isEqual(value, v->data[i])) {
return v->data[i];
}
}
return NULL;
}
void gcl_vector_remove(gcl_vector *v, void *elem, GCLIsEqual isEqual)
{
const size_t foundIndex = gcl_vector_find_index(v, elem, isEqual);
if (foundIndex == v->size + 1) {
return;
}
memmove(&v->data[foundIndex], &v->data[foundIndex + 1],
(v->size - foundIndex) * sizeof(*(v->data)));
--v->size;
}
GCLError gcl_vector_erase(gcl_vector *v, size_t pos)
{
if (pos > v->size - 1) {
return eOutOfBoundsAccess;
}
for (size_t i = pos; i < v->size; ++i) {
v->data[i] = NULL;
}
v->size = pos;
return eNoErr;
}
GCLError gcl_vector_reserve(gcl_vector *v, size_t n)
{
return n > v->capacity ? gcl_vector_realloc(v, n) : eInvalidOperation;
}
GCLError gcl_vector_set(gcl_vector *v, void *elem, size_t pos)
{
if (pos > v->size - 1 || gcl_vector_empty(v)) {
return eOutOfBoundsAccess;
}
v->data[pos] = elem;
return eNoErr;
}
void *gcl_vector_get(const gcl_vector *v, size_t pos)
{
if (pos > v->size - 1 || gcl_vector_empty(v)) {
return NULL;
}
return v->data[pos];
}
void gcl_vector_clear(gcl_vector *v)
{
memset(v->data, 0, v->size * sizeof(*(v->data)));
v->size = 0;
}
bool gcl_vector_empty(const gcl_vector *v)
{
return v->size == 0;
}
size_t gcl_vector_size(const gcl_vector *v)
{
return v->size;
}
size_t gcl_vector_capacity(const gcl_vector *v)
{
return v->capacity;
}
size_t gcl_vector_max_size(void)
{
return MAX_VECTOR_SIZE;
}
void *gcl_vector_front(const gcl_vector *v)
{
if (gcl_vector_empty(v)) {
return NULL;
}
return *(v->data);
}
void *gcl_vector_back(const gcl_vector *v)
{
if (gcl_vector_empty(v)) {
return NULL;
}
return v->data[v->size - 1];
}
void gcl_vector_foreach(gcl_vector *v, void *(*callback)(void *))
{
for (size_t i = 0; i < v->size; ++i) {
callback(v->data[i]);
}
}
void gcl_vector_sort(gcl_vector *v, GCLCompare sortCompare)
{
qsort(v->data, v->size, sizeof(*(v->data)), sortCompare);
}
void gcl_vector_destroy(gcl_vector *v)
{
free(v->data);
free(v);
}
2 Answers 2
Correctness
Good use of
size_t
.Good use of
const
with various functionals likegcl_vector_empty(const gcl_vector *v)
Something of a correctness issue. Better clean-up and rather see symmetry with the "first" and "last" functions. I want incorrect re-usage of a free'd variable to cause problems as quickly as possible. Also like
free()
, be tolerant of aNULL
argument. Also: consider complementary function names rather thaninit
anddestroy
.gcl_vector *gcl_vector_init(void) { ... } gcl_vector *gcl_vector_destroy(gcl_vector *v) { if (v) { free(v->data); v->data = NULL; v->size = 0; free(v); } return NULL; }
Watch out for
0-1
which isSIZE_MAX
. Anywhere-
occurs is a potential error.GCLError gcl_vector_set(gcl_vector *v, void *elem, size_t pos) { // v->size could be 0 here. // if (pos > v->size - 1 || ... if (pos + 1 > v->size || ...
BTW:
pos + 1 > v->size
is a sufficient check. No need for|| gcl_vector_empty(v)
if (pos + 1 > v->size) { // out of range
Pedantic:
MAX_VECTOR_SIZE
not enforced withv->capacity * 2
const size_t newElemCount = v->capacity == 0 ? INITIAL_VECTOR_CAPACITY : v->capacity * 2;
Performance Considerations (both memory and time improvements)
- Good that the true initial vector size is 0. A popular usage of this type could easily have numerous zero-length vectors. Disagree with
INITIAL_VECTOR_CAPACITY = 100
as 100 seems a bit high, but that is a quibble.
Code Style
In the header files, sometimes the user does not has or does not care for access to the .c source code. Conveying information in the header is useful. Recommend using variable names
// typedef int (*GCLCompare)(const void *, const void *); typedef int (*GCLCompare)(const void *element_x, const void *element_y);
With enumeration, Found it useful to create a
N
value to help check the range.typedef enum GCLError { eNoErr = 0, eInvalidArg, ... eOutOfBoundsAccess, GCLError_N } GCLError; if (error < 0 || error >= GCLError_N) Handle_BadError(error);
Minor: Prefer explicit
void
. Need to double check if()
is equivalent. I think, as a declaration,foo()
andfoo(void)
are equivalent but not as a lone definition.// gcl_vector *gcl_vector_init(); gcl_vector *gcl_vector_init(void);
etc.
void gcl_vector_clear(gcl_vector *v)
is a good function. Something useful for the user.Consider a compare function that pass the context - re
errno_t qsort_s(void *base, rsize_t nmemb, rsize_t size, int (*compar)(const void *x, const void *y, void *context), void *context);`
I think
gcl_vector_remove()
should return a success/fail indication.A small sample usage, as a
#if 0 ... #endif
comment in thecgcl_vector.h
file goes a long way to helping folks understand the basics of this tool set.As mentioned by @vnp, A useful "right size" function is missing.
Maybe a complementary function:
bool gcl_vector_full(const gcl_vector *v);
to go withbool gcl_vector_empty();
Nullifying removed elements serve no purpose. You may safely drop
memset(v->data, 0, v->size * sizeof(*(v->data)));
fromclear
,v->data[--v->size] = NULL;
frompopback
, etc.It is unclear why
erase
doesn't callmemset
but erases elements one by one.On the same note,
clear
shall be expressed in terms oferase
:void gcl_vector_clear(gcl_vector *v) { gcl_vector_erase(v, 0); }
In
foreach
, the callback return value is ignored. I suppose declaring argument asvoid (*callback)(void *)
conveys this better.I would consider to compile bound check feature conditionally.
shrink_to_fit
is sorely missing.
Explore related questions
See similar questions with these tags.