I write in C for several projects (e.g. learning / teaching, coding competitions, data processing) and frequently need arrays (e.g. strings) with fast appending / concatenation / reversing.
I've isolated the code below from a project. For simplicity, I assume all arguments are valid, including no pointer aliasing (for each function, no two arguments point to overlapping blocks of memory). I use a doubling strategy on reallocation.
I understand that vector
and string
from C++ likely
have superior performance / features - this is for settings where a simple C solution is more accessible or
easier to modify for the specific context of the problem.
stringi.h
#include <assert.h>
// contiguous dynamic array for fixed-size type T
// note: v[] is not terminated (no '0円')
typedef struct {
char *v; // T *v (testing: T = char)
size_t count; // count <= capacity
size_t capacity; // sizeof(v) / sizeof(T)
} stringi_raw;
// safe non-method mutations:
// 1. modifying v[i] for indices i
// e.g. v[i] = 'a'; ++v[i];
// 2. if n <= count, assigning n to count will keep
// the first n elements; beware of underflow!
// e.g. while (count > 0) --count;
// maximum capacity: capacity <= CAP_MAX
// practical: CAP_MAX = SIZE_MAX / sizeof(T)
static const size_t CAP_MAX = 987654321;
// pointer to a stringi_raw
typedef stringi_raw *stringi;
// initializes a stringi with 0 elements
// to be deallocated by stringi_free
stringi stringi_init();
// frees memory used by target from the heap
void stringi_free(stringi target);
// allow n items to be stored without realloc
// note: assumes n <= CAP_MAX
void stringi_reserve(stringi target, size_t n);
// appends c to the end of target
// note: takes O(1) amortized time
void stringi_put(stringi target, char c);
// appends the contents of src to dst
void stringi_cat(stringi dst, const stringi src);
// reverses target->v
void stringi_reverse(stringi target);
// aka "reverse(src); cat(dst, src); reverse(src)"
void stringi_cat_rev(stringi dst, const stringi src);
stringi.c
#include <stdlib.h>
#include "stringi.h"
stringi stringi_init()
{
stringi target = malloc(sizeof(stringi_raw));
assert(target != NULL);
target->v = NULL; // realloc(NULL, x) = malloc(x)
target->count = 0;
target->capacity = 0;
return target;
}
void stringi_free(stringi target)
{
free(target->v); // free(NULL) is safe
free(target);
}
// sets the capacity of target to cap
// note: assumes count <= cap <= CAP_MAX
void stringi_set_cap(stringi target, size_t cap)
{
void *v = realloc(target->v, cap * sizeof(char));
assert(v != NULL);
target->v = v;
target->capacity = cap;
}
// get the next capacity from cap
size_t double_or_max(size_t cap)
{
if (cap > CAP_MAX / 2)
return CAP_MAX;
return 2 * cap;
}
void stringi_reserve(stringi target, size_t n)
{
size_t cap = target->capacity;
if (n > cap)
{
if (cap == 0)
cap = 1;
else
{
cap = double_or_max(cap);
while (n > cap)
cap = double_or_max(cap);
}
stringi_set_cap(target, cap);
}
}
void stringi_put(stringi target, char c)
{
size_t n = target->count;
assert(n < CAP_MAX);
size_t n_fin = n + 1;
stringi_reserve(target, n_fin);
target->v[n] = c;
target->count = n_fin;
}
void stringi_cat(stringi dst, const stringi src)
{
size_t n_d = dst->count;
size_t n_s = src->count;
assert(n_d <= CAP_MAX - n_s);
size_t n_fin = n_d + n_s;
stringi_reserve(dst, n_fin);
for (size_t i = 0; i < n_s; ++i)
dst->v[n_d + i] = src->v[i];
dst->count = n_fin;
}
// if min <= max, reverse v[min] to v[max] inclusive
void chars_reverse(char *v, size_t min, size_t max)
{
while (min < max)
{
char temp = v[min];
v[min] = v[max];
v[max] = temp;
++min;
--max;
}
}
void stringi_reverse(stringi target)
{
// avoid underflow when count = 0
size_t count = target->count;
if (count > 0)
chars_reverse(target->v, 0, count - 1);
}
void stringi_cat_rev(stringi dst, const stringi src)
{
size_t n_d = dst->count;
size_t n_s = src->count;
assert(n_d <= CAP_MAX - n_s);
size_t n_fin = n_d + n_s;
stringi_reserve(dst, n_fin);
for (size_t i = 0; i < n_s; ++i)
dst->v[n_fin - 1 - i] = src->v[i];
dst->count = n_fin;
}
Tests
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <time.h>
#include "stringi.h"
void stringi_remove(stringi *target)
{
stringi_free(*target);
*target = NULL;
}
void stringi_print(stringi target)
{
printf("'%.*s' (%zu/%zu)\n",
(unsigned int) target->count, target->v,
target->count, target->capacity);
}
void stringi_validate(stringi target, char *expected)
{
size_t n = strlen(expected);
assert(n == target->count);
assert(0 == strncmp(expected, target->v, n));
}
int diff_to_ms(clock_t diff)
{
return diff * 1000 / CLOCKS_PER_SEC;
}
clock_t time_putchar(stringi target)
{
clock_t start = clock();
for (size_t i = 0; i < CAP_MAX; ++i)
stringi_put(target, 'c');
return clock() - start;
}
int main()
{
assert(CAP_MAX >= 1);
assert(CAP_MAX <= SIZE_MAX / sizeof(char));
stringi s1 = stringi_init();
stringi_validate(s1, "");
printf("create: s1 = %p\n", s1);
printf("\nexpanding s1:\n");
stringi_reverse(s1);
stringi_validate(s1, "");
for (int i = 0; i < 13; ++i)
{
stringi_print(s1);
stringi_put(s1, 'a' + i);
}
stringi_validate(s1, "abcdefghijklm");
stringi_print(s1);
stringi s2 = stringi_init();
stringi_validate(s2, "");
printf("\ncreate: s2 = %p\n", s2);
for (int i = 13; i < 26; ++i)
stringi_put(s2, 'a' + i);
stringi_validate(s2, "nopqrstuvwxyz");
stringi_cat(s1, s2);
stringi_validate(s1,
"abcdefghijklmnopqrstuvwxyz");
stringi_reverse(s1);
stringi_validate(s1,
"zyxwvutsrqponmlkjihgfedcba");
s2->count = 2;
stringi_validate(s2, "no");
stringi_cat(s1, s2);
stringi_validate(s1,
"zyxwvutsrqponmlkjihgfedcbano");
stringi_cat_rev(s1, s2);
stringi_validate(s1,
"zyxwvutsrqponmlkjihgfedcbanoon");
stringi_remove(&s1);
printf("remove: s1 = %p\n", s1);
stringi_remove(&s2);
printf("remove: s2 = %p\n", s2);
s1 = stringi_init();
stringi_validate(s1, "");
printf("create: s1 = %p\n", s1);
s2 = stringi_init();
stringi_validate(s2, "");
printf("create: s2 = %p\n", s2);
int diff1 = diff_to_ms(time_putchar(s1));
printf("\ns1 += CAP_MAX * 'c': %d ms\n", diff1);
stringi_cat(s1, s2);
// expect error: exceeding CAP_MAX
// stringi_put(s1, 'c');
s1->count = 0;
stringi_validate(s1, "");
printf("clear s1:\n");
stringi_print(s1);
stringi_remove(&s1);
stringi_reserve(s2, CAP_MAX);
printf("\nreserve for s2: CAP_MAX\n");
int diff2 = diff_to_ms(time_putchar(s2));
printf("s2 += CAP_MAX * 'c': %d ms\n", diff2);
s2->count = 0;
stringi_validate(s2, "");
printf("clear s2:\n");
stringi_print(s2);
stringi_remove(&s2);
return 0;
}
Output
create: s1 = 0000015E9D66F050
expanding s1:
'' (0/0)
'a' (1/1)
'ab' (2/2)
'abc' (3/4)
'abcd' (4/4)
'abcde' (5/8)
'abcdef' (6/8)
'abcdefg' (7/8)
'abcdefgh' (8/8)
'abcdefghi' (9/16)
'abcdefghij' (10/16)
'abcdefghijk' (11/16)
'abcdefghijkl' (12/16)
'abcdefghijklm' (13/16)
create: s2 = 0000015E9D66EEB0
remove: s1 =わ 0000000000000000
remove: s2 =わ 0000000000000000
create: s1 = 0000015E9D66EF90
create: s2 = 0000015E9D66F110
s1 += CAP_MAX * 'c': 1120 ms
clear s1:
'' (0/987654321)
reserve for s2: CAP_MAX
s2 += CAP_MAX * 'c': 931 ms
clear s2:
'' (0/987654321)
I would appreciate any thoughts on performance, readability, or modifiability.
3 Answers 3
I don't like this typedef:
// pointer to a stringi_raw typedef stringi_raw *stringi;
In C, it's important to know whether any value is a pointer or a value object. Obscuring this distinction is harmful to my understanding.
This assert()
is unfounded:
stringi target = malloc(sizeof(stringi_raw)); assert(target != NULL);
Assertions exist to tell us something that's true by design. However, we know that malloc()
can return a null pointer, and that's something that we need to handle - even (or especially) in production builds when automatic checking of our assertions is disabled by setting NDEBUG
. If our assertion is untrue, we proceed straight into undefined behaviour, and that's not something we should inflict on our users.
stringi_free()
is safe when passed a string with no storage, but (unlike free()
), it can't be passed a null pointer. We could document that its target
argument must be valid, but I think it's more helpful to users to just deal with null pointers:
void stringi_free(stringi target)
{
if (!target) { return; }
⋮
}
stringi.h
includes <assert.h>
. But that's not needed here - don't impose that cost on every translation unit that includes stringi.h
.
Conversely, it is invalid if there isn't a definition of size_t
in scope before it's included.
If stringi.h
is included multiple times in a translation unit, I think we get errors from redefining stringi_raw
. And an include guard to avoid that problem.
In stringi.c
, we have
#include <stdlib.h> #include "stringi.h"
This explains why we didn't spot the need for stringi.h
to have size_t
in scope. If we include our header first, then that gives us confidence that it's complete and not dependent on any prior declarations.
The tests demonstrate the functionality, but always return a success status. Better tests are self-checking, returning failure status if any of the operations don't produce the expected result.
-
2\$\begingroup\$ Good answer. Note that
stringi_free()
assumes that the pointer argument is not null, so it is coupled to the behaviors of theassert()
s aftermalloc()
andrealloc()
. \$\endgroup\$Nayuki– Nayuki2024年10月03日 16:29:56 +00:00Commented Oct 3, 2024 at 16:29 -
2\$\begingroup\$ Appreciate the answer. I will likely alter the design so that it returns NULL for failed
stringi_init()
and int error codes for failed extending, leaving theasserts
to the users. This would also make it easier to test failures by assert. And the order of includes is a great catch. \$\endgroup\$Justin Chang– Justin Chang2024年10月03日 17:03:54 +00:00Commented Oct 3, 2024 at 17:03 -
1\$\begingroup\$ Thanks @Nayuki - I've added a note about
stringi_free()
. \$\endgroup\$Toby Speight– Toby Speight2024年10月10日 07:15:43 +00:00Commented Oct 10, 2024 at 7:15
A few small items:
stringi stringi_init()
: In C, an empty parameter list means any number of arguments, not zero arguments. Change this tostringi stringi_init(void)
. (This is not necessary in C++ or starting in C23.)void chars_reverse(char *v, size_t min, size_t max)
: This function is only used within stringi.c, so make it "private" by addingstatic
in front, like:static void chars_reverse(...)
.void stringi_cat(...)
: Replace the for-loop withmemcpy()
:memcpy(dst->v + n_d, src->v, n_s);
void stringi_reserve(...)
: Use a do-while loop:do cap = double_or_max(cap); while (n > cap);
It seems like the canonical definition of
size_t
exists instddef.h
.
-
\$\begingroup\$ Appreciate it! The do while is a lot cleaner. \$\endgroup\$Justin Chang– Justin Chang2024年10月03日 17:55:07 +00:00Commented Oct 3, 2024 at 17:55
Review
You've named a few possibilities, but, really, where do you think you will actually use these functions (as a group)?
Every C programmer knows that C strings end with NUL terminators. These don't. These represent a reversion to Pascal conventions (each 'string' was prefixed with a hidden byte count only the compiler and Pascal libraries knew about.) C is not Pascal, and stepping away from this precept voids the utility of many tried and tested C Standard Library functions.
Example:
printf( "%s\n", "foobar" );
becomes
printf( "%.*s\n", ptr->len, (char*)ptr->val );
An improvement? Workable??
And puts()
is no longer useful, at all!
In C, a typical application involving lots of strings might be a "dictionary" whose individual words are accessible via an array of pointers...
With a modern compiler, sizeof( char * )
is typically 8 bytes.
The per-entry overhead of this proposal is (likely) 24 bytes!
I have a small English wordlist (~8000 entries of varying length). Running a quick test on that sample, a conventional array of pointers into a block of ragged length C strings required about 90,000 bytes of storage. Summing 24 bytes of overhead with the 'rounded up' allocation requirements for each of these 'Pascal strings' was twice as much... In blocks of powers of 2, the requirement is always the worst of its cohort. This is not good.
Plus, there is no demonstrated awareness of the overheads of malloc()
adding to the overheads of the "string fragments" represented by this code. Where's the advantage?
reVerSe vs reSerVe
Sorry, but I'm annoyed that these two different operations, with their too-similar spellings, have been "packaged-up" together.
How useful is reverse()
, really?
It's such a tiny & simple function (unrelated to memory allocation) that I'd feel better if it was not present in this collection.
Various SO questions, in the past, used "reverse in place" that took first
and last
parameters to reverse substrings within a given string. This had more utility, imo.
Summary
Without having made careful evaluation of every line of code, it looks like most of this package is nicely laid-out, readable and probably does what it needs to do.
However... Looking back over many years of programming, I cannot recall ever needing the functionality of stringi_catrev()
. (Where is its partner _prfrev()
that reverses the first string before appending the second string to it? Wouldn't the aforementioned (simple) 'reverse substring' be able to replace some of this?)
Most of these functions are little more than 'aliases'; wrappers for conventional C functions familiar to all who've already learned the language.
Explore related questions
See similar questions with these tags.
assert
is wrong. In the functionstringi_set_cap
you have the comment "assumes count <= cap <= CAP_MAX". That assumption should be checked with an assert. That is exactly what asserts are for. It'll be there when debugging, so the user can see when they call your functions in the wrong way, but won't be there in production, to not slow down the function. \$\endgroup\$realloc
is far superior to C++new
/delete
which omits any way to grow an existing allocation. Of course you need link-time optimization to inline across C source files to reduce per-push overhead since you put all your tiny functions in a separate.c
instead of directly in the.h
withstatic inline
(orinline
plus a.c
withextern inline
instantiation). \$\endgroup\$std::vector
for 7 billionpush_back
operations. The code above takes56176 ms
without optimizations and18947 ms
with optimizations, whilestd::vector
takes485778 ms
without optimizations and10968 ms
with optimizations. Unfortunately, the optimized executable for my code appears to be causing a ton of branch mispredictions (I have no idea what magic causesstd::vector
to avoid this). \$\endgroup\$-flto
orgcc -fwhole-program
to allow cross-file inlining from yourstringi.c
, if your benchmark was in a different file? Can you link your benchmark code, e.g. on godbolt.org (with compiler version/options you actually used) so I can see what you did and maybe profile it on my own Skylake system? Also what CPU? \$\endgroup\$% 64
every iteration, which costs amovabs r64, imm64
and a 64-bitmul
and some shift / LEA for each byte, which is maybe a similar amount of overhead to the bookkeeping to check if realloc is needed. I'm seeing negligible branch mispredicts withperf stat
on Skylake, miss rate is 0.00%. IPC is 3.41 instructions per clock. It's not getting optimized away despite not doing anything with the results except read the count, at least not by GCC. Clang can be more aggressive about optimizing away alloc/free even for code that writes the to buffer. \$\endgroup\$