I wrote a dynamic array implementation in ISO C11 using void pointers. Everything I've implemented works--at least in all my tests--on a 64-bit machine. It has some vague type-checking and resizes automatically. However, I am not a C programmer--not yet. I've only been working with it for about five months, and I certainly do not know all the ins and outs of pointer manipulation. However, I've run it through UBSAN, ASAN, and Valgrind--nothing violates any standards, and all memory is properly cleared when the type is used properly.
My goal for this post is to identify pieces within the implementation that can be improved in processing time/memory consumption, cross-compatibility, or reliability. The program is as follows:
// DArray.h
// These are defined in a secondary file, Include/Master.h, in the
// source code.
#include <assert.h> // IWYU pragma: keep
#include <errno.h> // IWYU pragma: keep
#include <stdbool.h> // IWYU pragma: keep
#include <stdio.h> // IWYU pragma: keep
#include <stdlib.h> // IWYU pragma: keep
#include <string.h> // IWYU pragma: keep
#include <unistd.h> // IWYU pragma: keep
typedef int32_t i32; // +/- 2,147,483,647
typedef int16_t i16; // +/- 32,767
typedef int8_t i8; // +/- 127
typedef uint32_t u32; // + 4,294,967,295
typedef uint16_t u16; // + 65,535
typedef uint8_t u8; // + 255
typedef uint32_t position_t; // position counter
typedef uint32_t length_t; // length counter
/**
* @brief My own, slightly less performant, slightly less clean, but
* slightly more descriptive assert function. It works about the same, but
* the printed output is much easier on the eyes.
*/
#define ASSERT(expr) \
((void)sizeof((expr) ? 1 : 0), __extension__({ \
if (!(expr)) \
{ \
fprintf(stderr, \
"\n\n033円[31mAssertion failed: %s.\nCaller: " \
"%s() @ %s on ln. %d.033円[0m\n\n\n", \
#expr, __func__, FILENAME, __LINE__); \
exit(EXIT_FAILURE); \
} \
}))
/**
* @brief A list of possible types that the dynamic array type can handle.
* The members of this list are basically all we really need, type wise, to
* use this type effectively, so I haven't bothered adding some kind of
* variable-inclusive-type system in. Too much work for little gain.
*/
typedef enum
{
/**
* @brief Unsigned 8-bit integer.
*/
unsigned8,
/**
* @brief Unsigned 16-bit integer.
*/
unsigned16,
/**
* @brief Unsigned 32-bit integer.
*/
unsigned32,
/**
* @brief Signed 8-bit integer.
*/
signed8,
/**
* @brief Signed 16-bit integer.
*/
signed16,
/**
* @brief Signed 32-bit integer.
*/
signed32,
/**
* @brief A string.
*/
string
} dynamic_array_type_t;
/**
* @brief A single node within a dynamic array. Its members are defined by
* the @ref dynamic_array_type_t enumerator.
*/
typedef union
{
/**
* @brief An unsigned 8-bit integer.
*/
u8 unsigned8;
/**
* @brief An unsigned 16-bit integer.
*/
u16 unsigned16;
/**
* @brief An unsigned 32-bit integer.
*/
u32 unsigned32;
/**
* @brief A signed 8-bit integer.
*/
i8 signed8;
/**
* @brief A signed 16-bit integer.
*/
i16 signed16;
/**
* @brief A signed 32-bit integer.
*/
i32 signed32;
/**
* @brief A raw c-string of variable length.
*/
char* string;
} dynamic_array_contents_node_t;
typedef struct
{
/**
* @brief An array of ambiguous information, all consecutive in memory.
*/
dynamic_array_contents_node_t* contents;
/**
* @brief The type of node this array is meant to store.
*/
const dynamic_array_type_t type;
/**
* @brief The size of the array, indexed from 1.
*/
u32 size;
/**
* @brief The number of occupied slots within the @ref contents array.
*/
u32 occupied;
} dynamic_array_t;
/**
* @brief Create a new dynamic array of type @param array_type and of size
* @param size. Note that the object returned from this function @b must be
* de-allocated via @ref DestroyDynamicArray.
* @param array_type The type of array. See @ref dynamic_array_type_t.
* @param size The initial size of the array. Try to estimate how big your
* array will be, to prevent unnecessary re-allocations.
* @return The requested array.
*/
INLINE dynamic_array_t CreateDynamicArray(dynamic_array_type_t array_type,
u32 size)
{
return (dynamic_array_t){
malloc(sizeof(dynamic_array_contents_node_t) * size), array_type,
size, 0};
}
/**
* @brief Destroy the given dynamic array.
* @param array The array to destroy.
*/
void DestroyDynamicArray(dynamic_array_t* array);
/**
* @brief Get the size of the given array, or the max amount of elements
* allowed within it before it must be reallocated.
* @param array The array to check.
* @return The size of the array.
*/
INLINE length_t GetArraySize(dynamic_array_t array) { return array.size; }
/**
* @brief Get the current amount of filled indexes within the given array.
* @param array The array to check,
* @return The amount of occupied indexes.
*/
INLINE length_t GetArrayOccupancy(dynamic_array_t array)
{
return array.occupied;
}
/**
* @brief Convert an ambiguous value (void pointer) into an array content
* node. This is basically just a switch statement, with some added copy
* functionality as to prevent data loss with strings.
* @param array_type The type of node we're creating.
* @param value The value to be converted.
* @return A brand-spanking-new array node union.
*/
dynamic_array_contents_node_t
CreateArrayNode(dynamic_array_type_t array_type, void* value);
/**
* @brief Append a node to the end of the given array. This is noted as
* "Internal" since it has a wrapper, @ref PushIntoArray, which converts an
* RValue to an LValue for the purpose of calling ease.
* @param array The array to append to.
* @param node The node to append.
*/
void InternalPushIntoArray(dynamic_array_t* array,
dynamic_array_contents_node_t node);
/**
* @brief A heler wrapper for the @ref InternalPushIntoArray function that
* allows the user to use literals when creating an array node via a void
* pointer.
*/
#define PushIntoArray(array, value) \
{ \
__typeof__(value) internal_value = value; \
InternalPushIntoArray( \
array, CreateArrayNode((array)->type, &internal_value)); \
}
/**
* @brief Insert a node into the beginning of the given array. This is
* noted as "Internal" since it has a wrapper, @ref PushIntoArray, which
* converts an RValue to an LValue for the purpose of calling ease.
* @param array The array to insert into.
* @param node The node to insert.
*/
void InternalPopIntoArray(dynamic_array_t* array,
dynamic_array_contents_node_t node);
/**
* @brief A heler wrapper for the @ref InternalPopIntoArray function
* that allows the user to use literals when creating an array node via a
* void pointer.
*/
#define PopIntoArray(array, value) \
{ \
__typeof__(value) internal_value = value; \
InternalPopIntoArray( \
array, CreateArrayNode((array)->type, &internal_value)); \
}
/**
* @brief Remove a value from an index in the given array. Note that this
* DOES NOT reallocate the array's size, just moves values, so the address
* taking up the variable being removed is not @b guaranteed to be
* overwritten until either a new addition to the array or the array's
* destruction. This effect is only noticable when removing the last index
* of the given array.
* @param array The array to remove from.
* @param index The index of the array to rip.
*/
void RipFromArray(dynamic_array_t* array, position_t index);
/**
* @brief Compare an abitrary value and an array node. Note that the
* behavior of this function is undefined if the type of both value and
* array are not the same.
* @param array_type The type of array we're checking.
* @param node The node to check.
* @param value The value to check against.
* @return true The values are the same.
* @return false The values are different.
*/
bool CompareArrayValues(dynamic_array_type_t array_type,
dynamic_array_contents_node_t node, void* value);
/**
* @brief Remove the first of a given value from the given array. It is
* up to the function caller to make certain the given value is of the
* type of the array.
* @param array The array to change.
* @param value The value to remove.
* @return true We found and removed the value.
* @return false We could not find the value.
*/
bool RemoveFirstFromArray(dynamic_array_t* array, void* value);
/**
* @brief Remove the last of a given value from the given array. It is up
* to the function caller to make certain the given value is of the type of
* the array.
* @param array The array to change.
* @param value The value to remove.
* @return true We found and removed the value.
* @return false We could not find the value.
*/
bool RemoveLastFromArray(dynamic_array_t* array, void* value);
/**
* @brief Remove all of a given value from the given array. It is up
* to the function caller to make certain the given value is of the type of
* the array.
* @param array The array to change.
* @param value The value to remove.
* @return true We found and removed the value.
* @return false We could not find the value.
*/
bool RemoveFromArray(dynamic_array_t* array, void* value);
// DArray.c
void DestroyDynamicArray(dynamic_array_t* array)
{
if (array->type == string)
{
for (position_t contents_index = 0;
contents_index < array->occupied; contents_index++)
free(array->contents[contents_index].string);
}
free(array->contents);
}
dynamic_array_contents_node_t
CreateArrayNode(dynamic_array_type_t array_type, void* value)
{
dynamic_array_contents_node_t node;
switch (array_type)
{
case unsigned8: node.unsigned8 = *(u8*)value; break;
case unsigned16: node.unsigned16 = *(u16*)value; break;
case unsigned32: node.unsigned32 = *(u32*)value; break;
case signed8: node.signed8 = *(i8*)value; break;
case signed16: node.signed16 = *(i16*)value; break;
case signed32: node.signed32 = *(i32*)value; break;
case string:
{
char* string = value;
node.string = malloc(strlen(string) + 1);
(void)strcpy(node.string, string);
break;
}
default:
LogError(unknown_generic_value); // Impossible unless
// something really
// fucky is going on.
}
return node;
}
void InternalPushIntoArray(dynamic_array_t* array,
dynamic_array_contents_node_t value)
{
array->occupied += 1;
if (array->occupied > array->size)
array->contents = realloc(array->contents,
sizeof(dynamic_array_contents_node_t) *
(array->size += 1));
array->contents[array->occupied - 1] = value;
}
void InternalPopIntoArray(dynamic_array_t* array,
dynamic_array_contents_node_t node)
{
array->occupied += 1;
if (array->occupied > array->size)
array->contents = realloc(array->contents,
sizeof(dynamic_array_contents_node_t) *
(array->size += 1));
for (position_t index = array->occupied - 1; index > 0; index--)
array->contents[index] = array->contents[index - 1];
array->contents[0] = node;
}
void RipFromArray(dynamic_array_t* array, position_t index)
{
ASSERT(array->occupied > index);
if (array->type == string) free(array->contents[index].string);
for (position_t removal_index = index;
removal_index < array->occupied - 1; removal_index++)
array->contents[removal_index] =
array->contents[removal_index + 1];
array->occupied -= 1;
}
bool CompareArrayValues(dynamic_array_type_t array_type,
dynamic_array_contents_node_t node, void* value)
{
switch (array_type)
{
case unsigned8: return node.unsigned8 == *(u8*)value;
case unsigned16: return node.unsigned16 == *(u16*)value;
case unsigned32: return node.unsigned32 == *(u32*)value;
case signed8: return node.signed8 == *(i8*)value;
case signed16: return node.signed16 == *(i16*)value;
case signed32: return node.signed32 == *(i32*)value;
case string: return strcmp(node.string, value) == 0;
default: LogError(unknown_generic_value);
}
}
bool RemoveFirstFromArray(dynamic_array_t* array, void* value)
{
for (position_t index = 0; index < array->occupied; index++)
{
if (CompareArrayValues(array->type, array->contents[index], value))
{
RipFromArray(array, index);
return true;
}
}
return false;
}
bool RemoveLastFromArray(dynamic_array_t* array, void* value)
{
for (position_t index = array->occupied - 1; index > 0; index--)
{
if (CompareArrayValues(array->type, array->contents[index], value))
{
RipFromArray(array, index);
return true;
}
}
return false;
}
bool RemoveFromArray(dynamic_array_t* array, void* value)
{
bool found_item = false;
for (position_t index = 0; index < array->occupied; index++)
{
if (CompareArrayValues(array->type, array->contents[index], value))
{
RipFromArray(array, index);
found_item = true;
index -= 1;
}
}
return found_item;
}
Edit: Note that the preprocessor macro FILENAME
is defined by my CMake build as the basename of the given file (i.e. DArray.c
).
2 Answers 2
Types
typedef int32_t i32;
makes it slightly faster to type the type name, but much harder for the experienced C programmer to read. i32
I have to go and look up, int32_t
I know what it is. Use the default names when you can.
In contrast, typedef uint32_t position_t;
is good. I like making types for specific purposes. However, C has size_t
for this. Use that instead.
So first you find int32_t
too long, but then you type
for (position_t removal_index = index;
removal_index < array->occupied - 1; removal_index++)
In an inner loop like this, you don't really need a long name like removal_index
. i
or ii
suffices. We all know that that is a loop counter.
It is OK to give descriptive names to things, but in general, the smaller the scope of a variable, the less descriptive the name needs to be. If a variable exists only within a 2-line loop, you don't need to do much to explain to the reader what the variable means. Something defined a few screenfuls earlier needs to be very clearly named to help the reader.
dynamic_array_contents_node_t
But the thing I really wanted to talk about is dynamic_array_contents_node_t
. On a 64-bit machine, this occupies 8 bytes (because of char*
). This is incredibly wasteful. Say I want my array to contain 1000 8-bit ints. This array is now 8000 bytes rather than 1000 bytes.
I don't know what your use case is for this array, but it is not at all necessary to over-allocate like this. Create a void*
array, and cast it to the right type before dereferencing.
Also, why do some functions like RemoveFromArray
take a void*
and not a dynamic_array_contents_node_t
? I guess it's easier to call RemoveFromArray(array, &10)
than specifically creating a dynamic_array_contents_node_t
. But is is terribly dangerous, because you need to know what type is stored in array
, and pass a pointer to the exact same type in: RemoveFromArray(array, &(uint8_t)10)
, otherwise it's not going to work. The "easy syntax" above was wrong! Forcing the user to pass in a dynamic_array_contents_node_t
would make it slightly harder to do wrong:
dynamic_array_contents_node_t v;
v.unsigned8 = 10;
RemoveFromArray(array, &v);
But either way, this is not at all type safe. C is a statically typed language, and all of this is just subverting that type safety.
So then we get to the use case: what could possibly require a dynamically typed array? Your code must still use values with specific types, so you should be able to use an array type with a specific type too, no? That would make the code simpler, and so much safer to use.
I am guessing the dynamic type is not really meant to be decided at runtime, but rather this is an attempt to avoid code duplication. You tried to write generic code with a language that doesn't support generics.
One solution to this would be to write some macros that generate the code for specific types for you. The macro would generate
dynamic_array_i32_t
,dynamic_array_u16_t
, etc. You would only define those you actually need in the current program. But macros do make the code harder to read, though the client code wouldn't be using any macros at all.Another solution is to just write one array for
int64_t
and one forstring
. The code for the two would be simpler than the single dynamically-typed array you have now, and just as efficient with memory (each element occupies 8 bytes anyway, so just store your 8-bit integers as 64-bit integers and be done with it).Another solution is to use a different language that supports generics.
I don't know what the right solution is because I don't know your use case.
API
InternalPushIntoArray
sounds like an internal function. Don't put it in the public interface (the header file). Keep it as a static function in the source file. CompareArrayValues
also looks like a support function, not sure if the user is meant to use it.
PopIntoArray
: pop means remove, but this function inserts a value. This is very confusing.
RipFromArray
: this should probably be Erase
or Delete
or something like that.
Where is the function to get values out of the array? All I can do is insert values and delete them, I cannot see what values are in there!
CreateDynamicArray
takes a u32
value for size
. Why not a length_t
? The data structure also uses u32
internally for size
and occupied
.
Bugs
You don't check the output of any malloc
or realloc
. These functions can fail. When they do, they return NULL. You need to check their output, and do something appropriate if it's NULL. Otherwise this is a bomb lying in wait to produce weird crashes in your production code a few years down the line, and it will be very hard to figure out what is causing it then.
DestroyDynamicArray
should, after calling free
, set the contents
pointer to NULL and size
and occupied
to 0. This should hopefully prevent its use after being freed. Your other functions should all guard against a 0 size and/or NULL pointer. Even if the guards are asserts
that only do anything in debug code. Anything you can do to help the user of the library (i.e. yourself) avoid mistakes is good.
-
1\$\begingroup\$ Thank you for such a detailed answer! Let's go down the line; the type definition issue was noted by another user, and I find myself agreeing. I'll change those within the program. But why would I use
size_t
instead ofposition_t
? To a layman such as myself,size
andposition
mean two different things, no? The name of loop variables was also noted by another user, and I had no idea that was good practice! I'll switch those over. Fordynamic_array_contents_node_t
, I had no idea that the size overhead was so massive, I'll fix that immediately! \$\endgroup\$Zenais– Zenais2024年07月20日 05:31:12 +00:00Commented Jul 20, 2024 at 5:31 -
1\$\begingroup\$ Along with that, though, I think the second solution you offered; "just write one array for int64_t and one for string" is far more pragmatic than the system I have in place now. I will implement that. As for the section labelled
API
, I had no idea thatstatic
was even an option here! I'll be using that solution now for sure (if I even have need for anyInternal
functions after I've implemented your fixes). The inconsistencies between data types used have already been fixed by me retroactively within the source code. Dynamic allocation error checks have since been fixed as well. :) \$\endgroup\$Zenais– Zenais2024年07月20日 05:34:26 +00:00Commented Jul 20, 2024 at 5:34 -
\$\begingroup\$ @Zenais Wonderful! I love to hear that my comments are received well. You can directly use
size_t
instead oflength_t
, as "size" is a common name for the length of an array. But I meant that you should usesize_t
instead ofuint32_t
to defineposition_t
, as that is the correct type for indexing into an array. On a 64-bit system,size_t
is a 64-bit unsigned integer, and it will always be able to address every element of the array, no matter how large the array is (it is defined as an integer of the same length as pointers). \$\endgroup\$Cris Luengo– Cris Luengo2024年07月20日 06:44:31 +00:00Commented Jul 20, 2024 at 6:44 -
\$\begingroup\$ Ah, okay! That makes sense, I'll switch it over now. \$\endgroup\$Zenais– Zenais2024年07月20日 06:47:54 +00:00Commented Jul 20, 2024 at 6:47
Stick to ISO C whenever possible:
In your custom ASSERT()
, you've used a compound statement expression, but that is only pertinent to one dialect of C, GNU C, and makes the code less portable. I also do not see the point of making ASSERT()
usable as an assert. It is only used once in the code, and that too as a statement.
I'd suggest replacing the extension with a simple do
-while
loop, or have ASSERT()
be a wrapper about an assert()
function.
FILENAME
too seems to be an extension. It can be replaced with __FILE__
, which expands to "The presumed name of the current source file (a character string literal)". If the problem is that it shows the full path, call something like basename()
on it.
-
\$\begingroup\$ Ah, FILENAME is a macro defined during my CMake build process as the name of the file without the full path. I figured it far more efficient to just have a literal prepared off the bat, rather than construct one. I should have noted that in the code, thank you. All of your other concerns (or, well, concern) are for sure valid, and I'll put them on my list for an edit pass on this interface. Thanks! \$\endgroup\$Zenais– Zenais2024年07月18日 06:15:45 +00:00Commented Jul 18, 2024 at 6:15
dynamic_array_t
intended to store elements of the same type or can the elements in one array be of different types? \$\endgroup\$DArray.h
-- Thei8
,i16
, etc. are all shortened versions of portable types found instdint.h
, and INLINE isstatic inline
. I did, however, forget to defineASSERT
, good catch. I'll edit that in now. As for the ignoredmalloc()
check, I completely forgot to do that. Adding it to my own code now. \$\endgroup\$typedef uint32_t u32;
because using 3 characters is shorter than using 8 (standard) characters as a datatype. On the other hand, there'svoid InternalPushIntoArray(dynamic_array_t* array, dynamic_array_contents_node_t value)
because... Why?? This code NEEDS more concision. As it is, it is very difficult to read and evaluate. \$\endgroup\$u32
, but what is the problem withInternalPushIntoArray
? Its name is perfectly descriptive, and the parameters passed to it make sense logically. \$\endgroup\$