I'm currently going through nand2tetris, mainly as an excuse to learn C via writing some non-trivial programs. As I've already done it in python, I've began to 'port' over some of the python features to c to make the rest of the course easier (N2T requires you to write an assembler, VM, compiler, etc.). This is the start of an attempt at a string type.
This string lib mostly lifts its (limited*) functionality from python strings, obviously there is not the same safety (I did consider making the struct String
opaque, but I've read that's generally considered bad practice).
Overall, the design goals are as follows:
- Strings should know their lengths (prevent out of bound errors)
- Strings should be immutable (if you want to add/remove characters, you create new string)
- API should be clear/obvious to use
- Discourage indexing directly (not quite an iterator but I've gone with java style charAt)
- Stay true to N2T style i.e. reinvent the wheel where possible/reasonable
I'm not going to direct the review too much, please just point out things that are horrible, "smell funny" and/or could generally be improved.
mystring.h
/* String library headers */
#ifndef MYSTRING
#define MYSTRING
#include <stdlib.h>
/* simple struct for managing strings */
typedef struct {
size_t length;
char *s;
} String;
/* create new string */
String newstr(char *);
/* concatenate two strings */
String concat(const String *, const String *);
/* concatenate a string with a character array (i.e. c string) */
String concatRaw(const String *, char *);
/*
prefix matching - returns 1 if there is a prefix match, else return 0
prefix can one or more characters i.e. 'hello' is a prefix of 'hello world'.
*/
int startswith(const String *, char *prefix);
/* get char at the given index - if out of bounds, log error and exit */
char charat(const String *, size_t idx);
void freestr(String *);
#endif
mystring.c
/* An attempt at as C string library */
#include <stdio.h>
#include "mystring.h"
#define OOM "-------- OUT OF MEMORY ---------"
/* Log error before exiting */
static void exit_with_message(char *func_name, char *ptr_name, char *message)
{
printf("(%s)-(%s): %s\n", func_name, ptr_name, message);
exit(1);
}
void freestr(String *w)
{
if (!w->s)
return;
free(w->s);
if (w->s != NULL)
w->s = NULL;
}
String concat(const String *w1, const String *w2)
{
size_t i, j;
String newString;
char *buff = malloc(sizeof(*buff) * (w1->length + w2->length + 1));
if (!buff)
// 'catch' oom error
goto oom_error;
i = j = 0;
// copy over old strings to the new buffer
while (i < w1->length) {
buff[i] = w1->s[i];
i++;
}
while (j < w2->length) {
buff[i++] = w2->s[j++];
}
// set final character
buff[i] = '0円';
newString.s = buff;
newString.length = w1->length + w2->length;
// free strings in w1, w2
freestr((String *)w1);
freestr((String *)w2);
return newString;
oom_error:
// free strings before exiting
freestr((String *)w1);
freestr((String *)w2);
exit_with_message("concat", "buff", OOM);
// return empty string for the compiler, does not matter as we will exit anyways
return (String){0, NULL};
}
String concatRaw(const String *w1, char w2[])
{
// turn w2 to a proper string, then concatenate
// I guess this can be done by the user - although the option is nice to have
String n = newstr(w2);
return concat(w1, &n);
}
int startswith(const String *haystack, char n[])
{
// we create the needle string in here so we can free it after the
// comparison
String needle = newstr(n);
int result = 0;
if (needle.length <= haystack->length) {
// we need to create this tmp variable as we'll be moving through the
// string using pointers and in order to successfully free the string the
// pointer variable needs to point to the beginning, otherwise it
// causes a memory free error.
char *tmp = needle.s;
for (size_t i = 0; i < needle.length; i++, tmp++) {
if (*tmp != haystack->s[i])
break;
}
// if we got to the end of the needle, result == 1, else 0
result = (*tmp == '0円');
}
// free the string contents of the needle
freestr(&needle);
return result;
}
char charat(const String *str, size_t index)
{
// off by one error means last item is (s->length - 1)
if (index >= str->length)
exit_with_message("charat", "None", "Out of bounds error");
char val = str->s[index];
return val;
}
String newstr(char word[])
{
String newString;
size_t length = 0;
while (word[length] != '0円')
length++;
// using *buff in the sizeof function 'locks' together the declaration and
// type information i.e. if buff type changes, the malloc does not need to
// change as well. This gives less room for errors.
// more info: https://stackoverflow.com/a/605858
char *buff = malloc(sizeof(*buff) * length + 1);
if (!buff)
exit_with_message("newstr", "buff", OOM);
size_t i = 0;
// copy word to buffer
while ((buff[i] = *word++) != '0円')
i++;
buff[i] = '0円';
newString.s = buff;
newString.length = length;
return newString;
}
test_mystring.c
/* String tests. */
#include <assert.h>
#include <stdio.h>
#include "mystring.h"
/* utility function to ensure two strings are the same */
int issame(const char w1[], const char w2[])
{
while (*w1 != '0円' && *w2 != '0円' && *w1 == *w2) {
w1++;
w2++;
}
if (*w1 != '0円' || *w2 != '0円')
return 0;
return 1;
}
void test_issame()
{
assert(issame("hello", "hello"));
assert(issame("hello world!!", "hello world!!"));
assert(!issame("", "hello"));
assert(!issame("hello", ""));
assert(!issame("hello ", "hello"));
assert(!issame("hello", "bello"));
assert(!issame("hello", "hella"));
assert(!issame("hello", "hell"));
assert(!issame("hell", "hello"));
assert(!issame("aello", "hello"));
}
void test_newstr_length()
{
assert(newstr("hello").length == 5);
assert(!(newstr("hello").length == 6));
assert(!(newstr("hello").length == 4));
assert(newstr("world").length == 5);
assert(newstr("hello world!!").length == 13);
assert(newstr(" hello world").length == 21);
}
void test_newstr_actual_string()
{
assert(issame(newstr("hello").s, "hello"));
assert(issame(newstr("This is a much longer word").s, "This is a much longer word"));
assert(!issame(newstr("hello ").s, "hello"));
assert(!issame(newstr("hello").s, "bello"));
assert(!issame(newstr("hhello").s, "hello"));
assert(!issame(newstr(" hello").s, "hello"));
// empty string
assert(issame(newstr("").s, ""));
}
void test_newstr()
{
test_newstr_length();
test_newstr_actual_string();
}
void test_concat__basic()
{
String w1, w2;
w1 = newstr("this is ");
w2 = newstr("a concatenated string");
assert(issame(concat(&w1, &w2).s, "this is a concatenated string"));
w1 = newstr("");
w2 = newstr("a concatenated string");
assert(issame(concat(&w1, &w2).s, "a concatenated string"));
w1 = newstr("this is ");
w2 = newstr("a concatenated string");
assert(!issame(concat(&w1, &w2).s, "this is a concatenated"));
}
void test_concatRaw__basic()
{
String w1 = newstr("hello world");
size_t len = w1.length;
w1 = concatRaw(&w1, "!!");
assert(w1.length == (len + 2));
assert(issame(w1.s, "hello world!!"));
}
void test_concatRaw()
{
char *t[] = {"a", "b", "c"};
String w1 = newstr("");
for (int i = 0; i < 3; i++)
w1 = concatRaw(&w1, t[i]);
assert(w1.length == 3);
assert(issame(w1.s, "abc"));
}
void test_concatRaw__stress()
{
String w1 = newstr("");
for (int i = 0; i < 1000; i++)
w1 = concatRaw(&w1, "a");
assert(w1.length == 1000);
}
void test_concat()
{
test_concat__basic();
test_concatRaw__basic();
test_concatRaw__stress();
test_concatRaw();
}
void test_startswith()
{
String w1 = newstr("// this is a comment");
assert(startswith(&w1, "//"));
assert(!startswith(&w1, " /"));
String w2 = newstr("hello world!");
assert(startswith(&w2, "h"));
assert(startswith(&w2, "hell"));
assert(startswith(&w2, "hello"));
assert(!startswith(&w2, "bello"));
assert(!startswith(&w2, "helo"));
String w3 = newstr("");
assert(startswith(&w3, ""));
assert(!startswith(&w3, " "));
freestr(&w1);
freestr(&w2);
freestr(&w3);
}
void test_charat()
{
String w1 = newstr("// this is a comment");
assert(charat(&w1, (size_t)0) == '/');
assert(charat(&w1, (size_t)9) == 's');
assert(charat(&w1, (size_t)10) == ' ');
assert(charat(&w1, w1.length - 1) == 't');
// this successfully exists
// assert(charat(&w1, w1.length) == 't');
}
void tests()
{
test_issame();
test_newstr();
test_concat();
test_startswith();
test_charat();
}
int main()
{
tests();
printf("----- STRING TESTS PASS ------\n");
return 0;
}
3 Answers 3
Overview:
You don't keep the String
consistent when you free the string. You set the buffer to NULL but the length is not reset. So if you now use this with any function that indexes into the string it still looks like a valid string but you get undefined behavior.
I find your interface a bit confusing. You return objects but you expect the user to pass in pointers to the function. It should be one or the other. Either always use pointers or always use objects.
Another issue is that you return the object by value. This sort of implies that your user can copy the object. And they will. So you will then have multiple objects that have the underlying same string in them. If one of these copies is freed then all the others become invalid but will still have pointers in internally that look good. I suppose that passing pointers around has the same issue (but that's a problem with C).
Note:
A common pattern for handling this is to allocate the block and the array into a single object.
PS. C has extensive and robust string handling please use it rather than re-creating the wheel. It may be flawed but we know the flaws and you can use it in your library safely.
Its also good to use as the standard library is highly optimized.
typedef struct {
size_t length;
char buffer[0];
} String;
String* newString(char* str)
{
size_t strLen = str == NULL ? 0 : strlen(str);
// Allocate room for the length and the buffer in a single object.
String* result = malloc(sizeof(size_t) + sizeof(char) * strLen + 1);
if (result == NULL) {
exit_with_message("newstr", "buff", OOM);
}
if (std == NULL) {
result.buffer[0] = '0円';
} else {
strcpy(result.buffer, str);
}
return result;
}
Code Review:
Not Sure if this is UNIQUE enough.
#ifndef MYSTRING
#define MYSTRING
I can see many people using this guard. Try and make it unique incase this gets includes into a project with other people's code.
I know C is not perfect with type safety but you could make s
a char const*
so you don't accidentally modify the string.
typedef struct {
size_t length;
char *s;
} String;
Here you are returning a String
object.
String newstr(char *);
But here you pass in String objects by pointer. Why not pass the object and stay consistent on usage.
String concat(const String *, const String *);
I know that in the function you deallocate the input parameters, but I question that down below.
Again passing String
by pointer when you return by object.
String concatRaw(const String *, char *);
C does have a boolean type.
int startswith(const String *, char *prefix);
Here I will defer to the people better at C than me. But if you have a boolean type why not use it rather than being vague with an int
.
void freestr(String *w)
{
// Sure nice shortcut out.
if (!w->s)
return;
free(w->s);
// No need to do this check.
// You already did this check above.
if (w->s != NULL)
// Why only set the s pointer.
// Set the length as well this will prevent
// accidents where you try and read from the string
// after you have called free.
w->s = NULL;
}
String concat(const String *w1, const String *w2)
{
// Declare variables as close to the point of use as possible.
// Also intialize them on creation if possible to avoid accidental errors.
// I know that in original C you had to put the variables at the
// top of the function. But in modern C that is no longer true.
size_t i, j;
String newString;
Use the C string libraries.
// Simpler to use strcpy()
while (i < w1->length) {
buff[i] = w1->s[i];
i++;
}
// Simpler to use strcat()
while (j < w2->length) {
buff[i++] = w2->s[j++];
}
Why are you freeing the input parameters?
// free strings in w1, w2
freestr((String *)w1);
freestr((String *)w2);
Does not seem logical that just because you append that the original values are not useful.
This is very inefficient!
String concatRaw(const String *w1, char w2[])
{
// turn w2 to a proper string, then concatenate
// I guess this can be done by the user - although the option is nice to have
String n = newstr(w2);
return concat(w1, &n);
}
You allocate an object just so you can call concat()
. Why not do it the other way?
If you define concat()
in terms of concatRaw()
both become efficient.
NOTE: Here I am re-arranging your functions as is. Not looking to add in code of my own. So this example is just to show how the concat()
and concatRaw()
can be effecient and defined in terms of each other.
String concat(const String *w1, const String *w2)
{
String result = concatRawInternal(w1, w2->s, w2->length);
if (result.s != NULL) {
freestr((String *)w2);
}
return result;
}
String concatRaw(const String *w1, char w2[])
{
size_t length = 0;
while (w2[length] != '0円')
length++;
return concatRawInternal(w1, w2, length);
}
String concatRawInternal(const String *w1, char w2[], size_t w2Len)
{
size_t i, j;
String newString;
char *buff = malloc(sizeof(*buff) * (w1->length + w2Len + 1));
if (!buff)
// 'catch' oom error
goto oom_error;
i = j = 0;
// copy over old strings to the new buffer
while (i < w1->length) {
buff[i] = w1->s[i];
i++;
}
while (j < w2Len) {
buff[i++] = w2->s[j++];
}
// set final character
buff[i] = '0円';
newString.s = buff;
newString.length = w1->length + w2->length;
// free strings in w1, w2
freestr((String *)w1);
return newString;
oom_error:
// free strings before exiting
freestr((String *)w1);
freestr((String *)w2);
exit_with_message("concat", "buff", OOM);
// return empty string for the compiler, does not matter as we will exit anyways
return (String){0, NULL};
}
int startswith(const String *haystack, char n[])
{
// we create the needle string in here so we can free it after the
// comparison
Don't think you "NEED" to create a needle
. Seem like it would be simpler and more effecient not to create it.
String needle = newstr(n);
-
\$\begingroup\$
strcpy()
+strcat()
can bestrcpy()
+strcpy()
, or evenmemcpy()
+memcpy()
, since we know the offset at which we are to append. That saves traversing the first string twice. \$\endgroup\$Toby Speight– Toby Speight2024年08月22日 08:41:16 +00:00Commented Aug 22, 2024 at 8:41 -
\$\begingroup\$ @Loki "could make s a char const* so you don't accidentally modify the string." --> then how to free the string with a
const char *
needs to be shown and explained why it is not UB to free aconst char *
. \$\endgroup\$chux– chux2024年08月22日 14:26:28 +00:00Commented Aug 22, 2024 at 14:26 -
\$\begingroup\$ Firstly, thanks for the extensive review. I'm going to respond across a couple of different comments as I have a 600-ish character limit on comments. RE passing pointers but returning objects, what you said makes sense. In fact, initially, I was passing the string to each function by value but then I came across these SO questions[1] and it made me think I should change them. I think I'll go back to the original of passing by value. [1] stackoverflow.com/q/161788 \$\endgroup\$arm93– arm932024年08月22日 20:09:13 +00:00Commented Aug 22, 2024 at 20:09
-
\$\begingroup\$ RE allocating the object and array in one. I've come across the concept of flexible arrays in structs but reading this[1] made it seem like it is generally considered bad practice.
Dennis Ritchie has called it "unwarranted chumminess with the C implementation"
, but maybe that opinion is now outdated?? [1] c-faq.com/struct/structhack.html \$\endgroup\$arm93– arm932024年08月22日 20:14:26 +00:00Commented Aug 22, 2024 at 20:14 -
\$\begingroup\$ RE concat freeing the strings. This seems to be something that all the reviewers picked up on. I think I was probably looking at this wrong but my initial reason was because I wanted the string to be immutable and by creating a new string, from the concatenated strings, it meant the initial two strings are no longer needed. I also wanted to make memory management requirements for the user a bit easier to handle. But from this review, I see the scenario that the string is referenced by other variables and once freed, all references suddenly become invalid. I'll probably need to rethink that. \$\endgroup\$arm93– arm932024年08月22日 20:24:28 +00:00Commented Aug 22, 2024 at 20:24
static void exit_with_message(char *func_name, char *ptr_name, char *message) { printf("(%s)-(%s): %s\n", func_name, ptr_name, message); exit(1); }
We should annotate this function with [[noreturn]]
.
The arguments should be const char*
so that we can call with string literals (as indeed we do) without swamping us with compiler warnings (if you've not enabled such warnings, I recommend you do - they can catch serious bugs).
Error messages should go to standard error stream:
fprintf(stderr, "(%s)-(%s): %s\n", func_name, ptr_name, message);
If we include <stdlib.h>
, we can use a more descriptive exit status:
exit(EXIT_FAILURE);
void freestr(String *w) { if (!w->s) return; free(w->s); if (w->s != NULL) w->s = NULL; }
Both of the tests are unnecessary - if we remove them, the behaviour is unchanged. However, it may be a good idea to check that w
is not null before accessing its members:
void freestr(String *w)
{
if (!w) {
return;
}
free(w->s);
w->s = NULL;
w->length = 0;
}
size_t i, j; String newString;
Prefer to defer declaration of locals to the point where they can be initialised. It's no longer necessary to declare all variables at the start of their scope.
char *buff = malloc(sizeof(*buff) * (w1->length + w2->length + 1));
Multiplication by sizeof *buff
may seem redundant given that sizeof (char)
is necessarily 1
. However, it's always good practice to tie size calculations with the size of the type like that (though I'd change the parens to (sizeof *buff)
for clarity), as it can help facilitate code re-use, e.g. with wide strings.
After making the code const-correct, fix the memory leaks:
gcc-14 -std=c23 -Wall -Wextra -Wwrite-strings -Wno-parentheses -Wpedantic -Warray-bounds -Wmissing-braces -Wconversion -Wstrict-prototypes -fanalyzer 293412.c -o 293412
293412.c: In function ‘test_newstr_length’:
293412.c:241:1: warning: leak of ‘<U7510>.s’ [CWE-401] [-Wanalyzer-malloc-leak]
241 | }
| ^
‘main’: events 1-2
|
| 374 | int main()
| | ^~~~
| | |
| | (1) entry to ‘main’
| 375 | {
| 376 | tests();
| | ~~~~~~~
| | |
| | (2) calling ‘tests’ from ‘main’
|
+--> ‘tests’: events 3-4
(much snipped to fit within max answer length)
| | (245) following ‘false’ branch...
|......
| 214 | return 1;
| | ~
| | |
| | (248) ...to here
|
<------+
|
‘test_newstr_actual_string’: event 249
|
| 254 | assert(issame(newstr("").s, ""));
| | ^
| | |
| | (249) returning to ‘test_newstr_actual_string’ from ‘issame’
|
‘test_newstr_actual_string’: event 250
|
| 254 | assert(issame(newstr("").s, ""));
| | ^~~~~~
| | |
| | (250) following ‘false’ branch...
|
‘test_newstr_actual_string’: events 251-252
|
| 255 | }
| | ^
| | |
| | (251) ...to here
| | (252) ‘<Uf630>.s’ leaks here; was allocated at (236)
|
valgrind --leak-check=full ./293412
=わ=わ2357414== Memcheck, a memory error detector
==2357414== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==2357414== Using Valgrind-3.20.0 and LibVEX; rerun with -h for copyright info
==2357414== Command: ./293412
==2357414==
----- STRING TESTS PASS ------
==2357414=わ=わ
=わ=わ2357414== HEAP SUMMARY:
==2357414== in use at exit: 1,243 bytes in 20 blocks
==2357414== total heap usage: 2,047 allocs, 2,027 frees, 504,947 bytes allocated
==2357414=わ=わ
=わ=わ2357414=わ=わ 1 bytes in 1 blocks are definitely lost in loss record 1 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109BE6: test_newstr_actual_string (293412.c:254)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 4 bytes in 1 blocks are definitely lost in loss record 2 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x1093AD: concatRaw (293412.c:120)
==2357414== by 0x109EE5: test_concatRaw (293412.c:302)
==2357414== by 0x10A010: test_concat (293412.c:325)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 3 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x1098A3: test_newstr_length (293412.c:235)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 4 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x1098E0: test_newstr_length (293412.c:236)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 5 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x10991D: test_newstr_length (293412.c:237)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 6 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x10995A: test_newstr_length (293412.c:238)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 7 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109A18: test_newstr_actual_string (293412.c:246)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 6 bytes in 1 blocks are definitely lost in loss record 8 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109AFF: test_newstr_actual_string (293412.c:249)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 7 bytes in 1 blocks are definitely lost in loss record 9 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109AB2: test_newstr_actual_string (293412.c:248)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 7 bytes in 1 blocks are definitely lost in loss record 10 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109B4C: test_newstr_actual_string (293412.c:250)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 7 bytes in 1 blocks are definitely lost in loss record 11 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109B99: test_newstr_actual_string (293412.c:251)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 14 bytes in 1 blocks are definitely lost in loss record 12 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109997: test_newstr_length (293412.c:239)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 14 bytes in 1 blocks are definitely lost in loss record 13 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x1093AD: concatRaw (293412.c:120)
==2357414== by 0x109DFD: test_concatRaw__basic (293412.c:288)
==2357414== by 0x10A006: test_concat (293412.c:323)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 21 bytes in 1 blocks are definitely lost in loss record 14 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x10A2F0: test_charat (293412.c:354)
==2357414== by 0x10A412: tests (293412.c:370)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 22 bytes in 1 blocks are definitely lost in loss record 15 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x1099D4: test_newstr_length (293412.c:240)
==2357414== by 0x109C30: test_newstr (293412.c:260)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 22 bytes in 1 blocks are definitely lost in loss record 16 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x109D00: test_concat__basic (293412.c:275)
==2357414== by 0x10A001: test_concat (293412.c:322)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 27 bytes in 1 blocks are definitely lost in loss record 17 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x1094EB: newstr (293412.c:177)
==2357414== by 0x109A65: test_newstr_actual_string (293412.c:247)
==2357414== by 0x109C35: test_newstr (293412.c:261)
==2357414== by 0x10A403: tests (293412.c:367)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 30 bytes in 1 blocks are definitely lost in loss record 18 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x109C81: test_concat__basic (293412.c:271)
==2357414== by 0x10A001: test_concat (293412.c:322)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 30 bytes in 1 blocks are definitely lost in loss record 19 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x109D7F: test_concat__basic (293412.c:279)
==2357414== by 0x10A001: test_concat (293412.c:322)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414=わ=わ 1,001 bytes in 1 blocks are definitely lost in loss record 20 of 20
==2357414== at 0x4842808: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==2357414== by 0x109241: concat (293412.c:76)
==2357414== by 0x1093AD: concatRaw (293412.c:120)
==2357414== by 0x109FAC: test_concatRaw__stress (293412.c:314)
==2357414== by 0x10A00B: test_concat (293412.c:324)
==2357414== by 0x10A408: tests (293412.c:368)
==2357414== by 0x10A41E: main (293412.c:376)
==2357414=わ=わ
=わ=わ2357414== LEAK SUMMARY:
==2357414== definitely lost: 1,243 bytes in 20 blocks
==2357414== indirectly lost: 0 bytes in 0 blocks
==2357414== possibly lost: 0 bytes in 0 blocks
==2357414== still reachable: 0 bytes in 0 blocks
==2357414== suppressed: 0 bytes in 0 blocks
==2357414=わ=わ
=わ=わ2357414== For lists of detected and suppressed errors, rerun with: -s
==2357414== ERROR SUMMARY: 20 errors from 20 contexts (suppressed: 0 from 0)
And everything in Loki's answer.
-
1\$\begingroup\$ "We should annotate this function with [[noreturn]]" --->
[[noreturn]]
is not part of standard C, yet. \$\endgroup\$chux– chux2024年08月22日 14:15:25 +00:00Commented Aug 22, 2024 at 14:15 -
\$\begingroup\$
[[noreturn]]
was introduced in C23, I'm sure. \$\endgroup\$Toby Speight– Toby Speight2024年08月22日 14:44:48 +00:00Commented Aug 22, 2024 at 14:44 -
1\$\begingroup\$ @Yet C23 is not yet released. \$\endgroup\$chux– chux2024年08月22日 14:46:50 +00:00Commented Aug 22, 2024 at 14:46
-
\$\begingroup\$ Thanks for the great review! The error-handling stuff is very useful.
Multiplication by sizeof *buff is redundant given that sizeof (char) is necessarily 1.
. The dereferencing of*buff
insizeof
comes from this SO answer [1]. The suggestion being that it "locks" together the type of the variable and the type insizeof
. [1] stackoverflow.com/a/605858 \$\endgroup\$arm93– arm932024年08月22日 20:34:25 +00:00Commented Aug 22, 2024 at 20:34 -
1\$\begingroup\$ @chux, arm93's code does correctly use
sizeof *buf
; I've re-written that paragraph. \$\endgroup\$Toby Speight– Toby Speight2024年08月27日 06:45:34 +00:00Commented Aug 27, 2024 at 6:45
Lots of good reviews already done.
I'll add some points only about concat()
- many of these points apply to the overall code.
Concatenation detail
Make clear this function is doing a string1 + string2 --> string3
operation and not a string1 += string2
one. This 2nd form is what strcat()
does.
Name space
concat()
is too common and risks colliding in a larger application.
Consider a prefix that matches the *.h
file as in
mystring_concat()
IMO, the type String
should match the *.h
too. Maybe move from mystring.*
and String
to armstr
or the like.
Keep it constant
Code uses concat(const String *w1, const String *w2)
yet then later frees w1
, w2
. IMO, this is a very bad design choice. Do not call freestr((String *)w1); freestr((String *)w2);
Consider the call concat(&w1, &w1)
would attempt to free w1
twice.
If concat()
is still to free memory of w1, w2
, then the signature should be concat(String *w1, String *w2)
.
Initialize when declared
// size_t i, j;
...
// i = j = 0;
...
size_t i = 0;
size_t j = 0;
Use memcpy()
Since we know the length to copy memcpy()
has the best chance of being most performant.
//i = j = 0;
//while (i < w1->length) {
// buff[i] = w1->s[i];
// i++;
//}
// while (j < w2->length) {
// buff[i++] = w2->s[j++];
//}
//buff[i] = '0円';
memcpy(buff, w1->s, w1->length);
memcpy(buff + w1->length, w2->s, w2->length + 1);
// or if a str...() fan
strcpy(buff, w1->s);
strcpy(buff + w1->length, w2->s); // not strcat()
Call by value
The use of this string routines would be simpler with direct object passing.
// String concat(const String *w1, const String *w2)
String concat(String w1, String w2)
Pedantic, avoid overflow
w1->length + w2->length + 1
may overflow size_t
math.
It is easy to check.
if (w1->length >= SIZE_MAX - w2->length) {
Handle_overflow_with_TBD_code();
}
Consider not killing code on select errors
In general, applications prefer to handle code exit themselves and not have some helper utilities do it for them.
Instead, on error, just return (String){0, NULL};
.
This does imply these string routines need to handle return (String){0, NULL}
as an argument.
Sample
String mystring_concat(String w1, String w2) {
String newString;
if (w1.s == NULL || w2.s == NULL ||
w1.length >= SIZE_MAX/sizeof newString.s[0] - w2.length) {
return (String){0, NULL};
}
newString.length = w1.length + w2.length;
newString.s = malloc(sizeof newString.s[0] * (newString.length + 1));
if (newString.s == NULL) {
return (String){0, NULL};
}
memcpy(newString.s, w1.s, w1.length);
memcpy(newString.s + w1.length, w2.s, w2.length + 1);
return newString;
}
-
\$\begingroup\$ Thanks for the great review, its much appreciated. Good shout with the potential naming collisions, I'll change those. RE pass by value/reference and freeing the strings, I spoke about my initial rationale for both of those here [1] and here[2]. [1] codereview.stackexchange.com/questions/293412/… [2] codereview.stackexchange.com/questions/293412/… \$\endgroup\$arm93– arm932024年08月22日 20:46:38 +00:00Commented Aug 22, 2024 at 20:46
-
\$\begingroup\$
Consider the call concat(&w1, &w1) would attempt to free w1 twice.
Hopefully the guard clause in thefreestr
would prevent anything bad :') \$\endgroup\$arm93– arm932024年08月22日 20:47:32 +00:00Commented Aug 22, 2024 at 20:47 -
\$\begingroup\$
Instead, on error, just return (String){0, NULL};
I think this is a valid argument. The main reason for exiting was down to "if you're out of memory, you've got big problems, abort". Coming from higher level languages its not an error I've had to consider before, so I wasn't really sure how to handle it or what the best practices are tbh. \$\endgroup\$arm93– arm932024年08月22日 20:50:07 +00:00Commented Aug 22, 2024 at 20:50 -
1\$\begingroup\$ @Fe2O3
w1.length >= SIZE_MAX/sizeof newString.s[0] - w2.length
is to test if later codew1.length + w2.length
orsizeof newString.s[0] * (newString.length + 1)
would overflow. On review I'd still think its a correct test. Your thoughts? \$\endgroup\$chux– chux2024年08月27日 00:45:24 +00:00Commented Aug 27, 2024 at 0:45 -
1
strlen
once. Then you can usestrlen
in your code. No need to rewritewhile (word[length] != '0円') length++;
again and again and again forever in every function that needs to know a string's length. \$\endgroup\$