substr() function, not present in standard C library.
Syntax: char *substr(const char *str, long start_index, long end_index);
The description of function substr() is in the header file "substr.h".
The code was compiled using the following gcc flags:
-Wall -Werror -Wextra -Wundef -Wunreachable-code -Winit-self -Wparentheses -Wconversion -Wsign-conversion -Wsign-compare -Werror-implicit-function-declaration -Wmissing-prototypes -Wmissing-declarations -Wformat-security
The code is below:
substr.c
#include "substr.h"
#include <stdlib.h>
#include <string.h>
/* The description of function substr() is in the header file "substr.h". */
char *substr(const char *str, long start_index, long end_index)
{
char *substring = NULL;
long len = 0;
long substr_len = 0;
if ((!str) || (!*str))
return NULL;
if ((start_index < 0) || (end_index < 0) || (end_index < start_index))
return NULL;
len = (long)(strlen(str));
if ((start_index > (len - 1)) || (end_index > (len - 1)))
return NULL;
substr_len = end_index - start_index + 1;
substring = malloc((size_t)(substr_len + 1)); // extra 1 byte for null byte
if (!substring)
return NULL;
memmove(substring, str + start_index, (size_t)(substr_len));
substring[substr_len] = 0;
return substring;
} // end of substr
substr.h
#ifndef SUBSTR_H
#define SUBSTR_H
/*
* char *substr(const char *str, long start_index, long end_index):
*
* Function substr() allocates memory and returns a pointer to a string / character
* array which is a substring of 'str' starting from index 'start_index' till
* 'end_index' (inclusive). This substring is terminated by null byte at the end.
* If 'str' is NULL or 'str' is empty or 'start_index' is less than 0 or 'end_index'
* is less than 0 or 'end_index' is less than 'start_index' or 'start_index' is
* greater than length of 'str' - 1 or 'end_index' is greater than length of
* 'str' - 1 then NULL is returned.
*
* The returned pointer points to a memory region containing the substring and this
* memory region was allocated using malloc. So, it is the user's responsibility to
* free the allocated memory.
*
*/
char *substr(const char *str, long start_index, long end_index);
#endif
test_substr.c
#include "substr.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *get_input_from_stdin_and_discard_extra_characters(char *str, long size);
#define ARRAY_SIZE 256
int main(void)
{
char str[ARRAY_SIZE] = {0};
long start_index = -1;
long end_index = -1;
char *arg_str = NULL;
char *sub_str = NULL;
while (1) {
arg_str = str;
system("clear");
printf("\nPlease input a string to split (max 255 characters) (To enter NULL"
" string, type NULL and press ENTER): ");
get_input_from_stdin_and_discard_extra_characters(str, ARRAY_SIZE);
if (strcmp(str, "NULL") == 0) {
arg_str = NULL;
} else {
if (!*str)
printf("(Length of string entered = %zu.)\n", strlen(arg_str));
else
printf("(Length of string entered = %zu. Index 0 to %zu.)\n", strlen(arg_str), strlen(arg_str) - 1);
}
printf("\nPlease input the start index from where to start copying the"
" substring: ");
scanf("%ld", &start_index);
// now clear the stdin input buffer
get_input_from_stdin_and_discard_extra_characters(NULL, 0);
printf("\nPlease input the end index at which to stop copying the substring: ");
scanf("%ld", &end_index);
// now clear the stdin input buffer
get_input_from_stdin_and_discard_extra_characters(NULL, 0);
printf("\n");
printf("\n-----------------");
printf("\nInput parameters:");
printf("\n-----------------\n");
printf("str = \"%s\"\n", arg_str?arg_str:"(null string)");
printf("start index = %ld\n", start_index);
printf("end index = %ld\n", end_index);
printf("\n");
printf("\n-------");
printf("\nResult:");
printf("\n-------\n");
sub_str = substr(arg_str, start_index, end_index);
if (sub_str) {
printf("substr = \"%s\"\n\n", sub_str);
free(sub_str);
} else {
printf("substr() returned NULL.\n\n");
}
printf("\n\nPlease press ENTER to continue..");
// now clear the stdin input buffer
get_input_from_stdin_and_discard_extra_characters(NULL, 0);
} // end of while(1) loop
} // end of main
/*
* get_input_from_stdin_and_discard_extra_characters(char *str, long size):
*
* Function get_input_from_stdin_and_discard_extra_characters() reads at most
* 'size - 1' characters into 'str' from stdin and then appends the null
* character ('0円'). If 'size' is 0 then this function will discard all input
* and return NULL. So, to discard all input, this function can be called with
* 'str' having value NULL and 'size' having value 0.
* In all cases, reading input stops after encountering a newline ('\n') or EOF
* even if 'size - 1' characters have not been read. If a newline ('\n') or EOF
* is read then it is replaced by null character ('0円'). If there are extra
* characters in input, they are read and discarded.
* In all cases, 'str' or NULL is returned.
*/
char *get_input_from_stdin_and_discard_extra_characters(char *str, long size)
{
int c = 0;
long i = 0;
// If size is 0 then this function will discard all input and return NULL.
// No need to check str if size is 0.
if (size == 0) {
// discard all input
while ((c = getchar()) && (c != '\n') && (c != EOF));
return NULL;
}
if (!str)
return str;
if (size < 0)
return NULL;
for (i = 0; i < (size - 1); i = i + 1) {
c = getchar();
if ((c == '\n') || (c == EOF)) {
str[i] = 0;
return str;
}
str[i] = (char)(c);
} // end of for loop
str[i] = 0;
// discard rest of input
while ((c = getchar()) && (c != '\n') && (c != EOF));
return str;
} // end of get_input_from_stdin_and_discard_extra_characters
1 Answer 1
Error checks like
if ((!str) || (!*str))
andif ((start_index < 0) || (end_index < 0) || (end_index < start_index))
etc don't belong inside a library function. It's the caller's responsibility to do these checks on the caller side. Also some of these checks are just there because you picked an unsuitable integer typelong
instead of an unsigned type.I'd say that the checks after the
strlen
call are ok though.If you place the checks inside a library function, then the needless extra branches lag down normal use with correct parameters passed.
long start_index, long end_index
would be more appropriate assize_t
since we are ultimately dealing with indices of an array. Same thing elsewhere in the function, replacelong
withsize_t
. That way you get rid of the casts too.memmove
should bememcpy
since there is no risk of overlap here andmemcpy
is faster.There are already very similar functions
strstr
+strdup
orstrndup
where the dup ones are not yet standard C but looks like they will get added to the standard in the upcoming C23 version. So there's no obvious need to re-invent the wheel.
strndup()
is available on almost all platforms for a long time already, and will be in the C23 standard. \$\endgroup\$