This function str_join()
is an updated version of my earlier function strcat_new()
— found here: strcat_new()
function, not present in the Standard C library.
The older version strcat_new()
had a bug that is fixed in this version of str_join()
.
Syntax: char *str_join(const char *delim, long num_args, ...);
str_join.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "str_join.h"
/*
* str_join:
*
* Parameters:
* num_args: number of variable arguments that are passed to this function
* excluding the 'delim' string.
* ...: Variable number of pointers to character arrays.
*
* Description:
* str_join concatenates all the strings/character arrays passed to it. If
* 'delim' is not NULL then after every string, the 'delim' string is concatenated.
* It allocates a new character array whose size is equal to the sum of the
* lengths of all strings passed to it plus 1 (extra 1 for terminating null byte).
* It then concatenates all the strings passed to it separated by 'delim' string
* into the newly allocated character array and then returns the pointer to
* the newly allocated character array. If memory allocation fails then NULL is returned.
*
* It is the responsibility of the caller to free the allocated memory.
*/
char *str_join(const char *delim, long num_args, ...)
{
va_list valist;
long i = 0;
long j = 0;
long iica = 0; // iica - index into character array
long len = 0;
long delim_len = 0;
long total_len = 0;
long num_delim_to_concat = -1;
char *new_char_array = NULL;
char *temp = NULL;
if (num_args <= 0)
return NULL;
if (delim) {
delim_len = strlen(delim);
}
va_start(valist, num_args);
for (i = 0; i < num_args; i++) {
temp = va_arg(valist, char *);
if (!temp) continue;
total_len = total_len + strlen(temp);
num_delim_to_concat = num_delim_to_concat + 1;
}
va_end(valist);
if (total_len == 0)
return NULL;
total_len = total_len + (num_delim_to_concat) * (delim_len);
total_len = total_len + 1; // 1 extra for terminating null byte
new_char_array = malloc(total_len);
if (!new_char_array)
return NULL;
va_start(valist, num_args);
for (i = 0; i < num_args; i++) {
temp = va_arg(valist, char *);
if (!temp) continue;
len = strlen(temp);
for (j = 0; j < len; j++) {
new_char_array[iica] = temp[j];
iica++;
}
if (num_delim_to_concat > 0) {
for (j = 0; j < delim_len; j++) {
new_char_array[iica] = delim[j];
iica++;
}
num_delim_to_concat = num_delim_to_concat - 1;
}
}
va_end(valist);
new_char_array[iica] = 0;
return new_char_array;
} // end of str_join
str_join.h
#ifndef _STRCAT_NEW_H_
#define _STRCAT_NEW_H_
/*
* str_join:
*
* Parameters:
* num_args: number of variable arguments that are passed to this function
* excluding the 'delim' string.
* ...: Variable number of pointers to character arrays.
*
* Description:
* str_join concatenates all the strings/character arrays passed to it. If
* 'delim' is not NULL then after every string, the 'delim' string is concatenated.
* It allocates a new character array whose size is equal to the sum of the
* lengths of all strings passed to it plus 1 (extra 1 for terminating null byte).
* It then concatenates all the strings passed to it separated by 'delim' string
* into the newly allocated character array and then returns the pointer to
* the newly allocated character array. If memory allocation fails then NULL is returned.
*
* It is the responsibility of the caller to free the allocated memory.
*/
char *str_join(const char *delim, long num_args, ...);
#endif
test_str_join.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "str_join.h"
int main(void)
{
char *a = str_join(";:?", 4, "abc", "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join(NULL, 3, "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join(NULL, 0, "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join(NULL, -1, "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join("=", 4, "abc", "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join("{(=%$^%^&&(&)}", 4, "abc", "123", "xyz", "455");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join(";;;", 4, "abc", NULL, NULL, NULL);
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join(";;;", 4, "abc", NULL, NULL, "cde");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, strlen(a));
free(a);
a = str_join(NULL, 4, NULL, "fdr", NULL, NULL);
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join("-------", 10, "START", "hfjkdhf78578695590", "nvmcnkjfhgkjfh^&*^&*", "7856475947584", "!@^&^%#*&(*$&(&$(*", "j", "4", "khfkjhgiury6985768956uirhtukrhgkhfk", "jfjlkjgk65876895769576895khgkjfkkhkjhjG%^&%#*@^*@^$*&^#&$*#$#*$&#$&$(*#", "END");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join(NULL, 1, NULL, "fdr", NULL, NULL);
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join(";", 4, NULL, NULL, NULL, "xyz");
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join(";;;", 4, NULL, NULL, NULL, NULL);
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
a = str_join(NULL, 4, NULL, NULL, NULL, NULL);
printf("\n");
printf("a = %s, strlen(a) = %lu\n", a, a?strlen(a):(unsigned long)(-1));
free(a);
printf("\n");
}
2 Answers 2
No test reference
There are no tests in test_str_join.c
, just code being exercised. How is the test to detect if output is correct? If by visual inspection, including comments of expected output would at least be a good start.
Undefined behavior printing a null pointer
Example: last test may print a = (null), strlen(a) = 18446744073709551615
or crash or ... It is UB.
NULL
is not safe as a ...
argument
The type of NULL
is void *
, int
, long
, some integer type. It is not safe to fetch via va_arg(valist, char *)
.
Better test code would use (void *) NULL
, (void *) 0
.
Wrong type
Types used for string length are as if long
and size_t
were the similar types. size_t
is the type returned by strlen()
and the type used in allocations. size_t
is the Goldilocks type for array indexing, neither too narrow nor too wide. long
is signed, size_t
is unsigned. LONG_MAX
may be less than or more than (or even the same as) SIZE_MAX
. long
usage here remains questionable.
If you want to remain with long
, use len = (long) strlen(temp);
to quiet warnings like :
warning: conversion to 'size_t' from 'long int' may change the sign of the result [-Wsign-conversion]
Why delimiter after last?
Goal is "after every string, the 'delim' string is concatenated". Then later says "separated by 'delim' string". This code apparently only delimits between strings. Comments need revision for consistency.
Zero num_args
I still maintain a return of allocated ""
is expected for char *str_join(",", 0)
rather than NULL
of this code.
Repeated comments
Parameters: and Description: are repeated in the *.c and *.h files. Once is enough - best in the .h file. Consider code maintenance is more expensive that original code writing and maintaining same comments in 2 files is additional unnecessary work - likely to diverge over time.
Print formed string with sentinels
Easier to identify unexpected leading and trailing white-space.
// printf("a = %s ...
printf("a = \"%s\" ...
Simplify
To code a (unsigned long)(-1)
, easy to use -1UL
.
Define when needed
Rather than define an object far from its use and at a higher scope than needed:
//char *temp = NULL;
//... 8 lines
//for (i = 0; i < num_args; i++) {
// temp = va_arg(valist, char *);
// if (!temp) continue;
// total_len = total_len + strlen(temp);
for (i = 0; i < num_args; i++) {
char *temp = va_arg(valist, char *);
if (!temp) continue;
total_len = total_len + strlen(temp);
Minor: avoid !
I have found less misunderstandings when !
, !=
are avoided. Consider alternatives.
// if (!temp) continue;
if (temp == NULL) continue;
2nd posting without finishing first.
Consider wrapping up earlier related questions before posing an updated question.
unfinished before new business.
-
1\$\begingroup\$ Maybe add: Contradictory description: Is
delim
inserted between any two, or after every single argument... \$\endgroup\$Deduplicator– Deduplicator2021年12月07日 23:57:45 +00:00Commented Dec 7, 2021 at 23:57 -
-
\$\begingroup\$ I will fix the description. Basically, the delim is between valid (non-null and non-empty) strings (arguments). I also need to fix the empty string(s) case - if the user passes empty string(s) then these empty string(s) will not be concatenated/joined and will be ignored. \$\endgroup\$user245050– user2450502021年12月31日 07:22:45 +00:00Commented Dec 31, 2021 at 7:22
-
\$\begingroup\$ Chux, on your comment:
Undefined behavior printing a null pointer
- how will it crash? How is it unbounded? \$\endgroup\$user245050– user2450502021年12月31日 07:36:33 +00:00Commented Dec 31, 2021 at 7:36 -
1\$\begingroup\$ @Amit, re "Undefined behavior printing a null pointer - how will it crash? How is it unbounded?" to attempt to use
"%s"
with an invalid pointer -- it is undefined (UB), there is no prescribed way it will crash - it is not defined. It might "work", it might crash in various ways. If you still want to print something reliable whens == NULL
, instead ofprintf("%s\n", s);
useprintf("%s\n", s ? s : "null");
\$\endgroup\$chux– chux2021年12月31日 11:11:22 +00:00Commented Dec 31, 2021 at 11:11
The macro name _STRCAT_NEW_H_
is an identifier reserved for any purpose by the implementation. Do not use identifiers beginning with _
in your own code.
Always include your defining header first in your implementation file, before standard headers. This can help identify accidental dependencies (i.e. creating a header that's not self-sufficient).
Many of the variables have much larger scope than necessary. Reduce scope, and delay declaration to where the variable can be initialised.
Avoid using signed types where unsigned makes more sense. For example, num_args
should be a size_t
, or possibly unsigned
given that we should not be passing 65536 or more arguments in a function call. Certainly the "length" accumulators should be size_t
.
The local variables for the argument strings should be const char*
, because callers should be able to pass non-writable strings.
Don't return a null pointer when the result is an empty string - that just makes life difficult for the caller, and makes it impossible to tell whether the function was successful or not. Be careful that num_delim_to_concat
doesn't remain at -1 in that case, though.
Consider splitting out a function that accepts a va_list
argument (ala vsprintf()
) so that calling code is able to forward argument lists. You'll find this useful for the tests, too.
Use standard functions (strcat
/strcpy
/memcpy
) for writing to the output string, instead of hand-rolling loops. A decent optimising compiler will likely generate the same code for both, but at less risk of programmer error.
Prefer idiomatic increment - e.g. --num_delim_to_concat
rather than num_delim_to_concat = num_delim_to_concat - 1
, and total_len += num_delim_to_concat * delim_len
rather than total_len = total_len + (num_delim_to_concat) * (delim_len)
. Writing these out longhand leaves the reader wondering what's going on.
The tests are repetitive, which suggests that a helper function may be valuable. Also, instead of being self-checking, they require manual inspection, which is time-consuming and error-prone. Rewrite so that the exit status indicates whether the tests passed or not.
Tests pass NULL
as a vararg parameter. This needs to be cast to char*
to be safely transferred.
Tests should help demonstrate how the code is to be used. Therefore, they should test whether a null pointer was returned, rather than assuming a valid object pointer.
-
\$\begingroup\$ Toby, about
Many of the variables have much larger scope than necessary
: I don't like "define/declare variables when needed". This way my variables would be scattered all across the function. I want all my variables at the top - I feel comfortable doing this. Scattering my variables all across the function doesn't make me feel comfortable. All the places I worked (I worked mostly in C), there was no guideline to follow "define/declare when needed". I have seen lots and lots of active C production code which does not follow "define/declare when needed" approach. \$\endgroup\$user245050– user2450502022年01月07日 06:56:27 +00:00Commented Jan 7, 2022 at 6:56 -
\$\begingroup\$ Toby, size_t vs long is debatable/personal preference. I like long instead of size_t. I have no problem if someone doesn't want to use my code because of this. \$\endgroup\$user245050– user2450502022年01月07日 06:59:34 +00:00Commented Jan 7, 2022 at 6:59
-
\$\begingroup\$ Another downside to declaring all variables at start of function scope is the risk of use before assignment. It's possible that the the "lots of production code" was written prior to 1999, when declarations couldn't be intermixed with statements. Using
long
instead ofsize_t
is just wrong (both in range and signedness), and the resulting warnings can obscure other problems in your code (and if you're not getting warnings, you really should be enabling them). \$\endgroup\$Toby Speight– Toby Speight2022年01月07日 08:42:08 +00:00Commented Jan 7, 2022 at 8:42 -
\$\begingroup\$ There is no risk of use before assignment because variables should be initialized at the time of declaration. \$\endgroup\$user245050– user2450502022年01月07日 13:24:52 +00:00Commented Jan 7, 2022 at 13:24
-
\$\begingroup\$ The lots of production code that I saw (that didn't use define/declare when needed) was as late as 2020. \$\endgroup\$user245050– user2450502022年01月07日 13:26:27 +00:00Commented Jan 7, 2022 at 13:26
return 0;
inmain()
. \$\endgroup\$