I'm writing a C application for the Networking course at the University. Basically it's a UDP server that can receive two commands:
GET key
SET key value
That are executed against a Hash Table data structure. I'm now writing the code that parses the string into a command struct, and the logic is the following:
- A command cannot be empty and is recognized only if it starts with
GET
orSET
- A key cannot be null and cannot be longer than 100 characters.
- In a
SET
command the data cannot be null
The code seems to work so far, but I'm not so skilled in writing C code, so I'm worried about some hidden bugs or mistakes that I did.
So, if someone can review my code it will be greatly appreciated!
The header file:
#ifndef DUCKY_COMMAND_H
#define DUCKY_COMMAND_H
#include <stddef.h>
#define MAX_KEY_LENGTH 100
#define ERR_COMMAND_NOT_RECOGNIZED -1
#define ERR_KEY_LENGTH -2
#define ERR_NO_KEY -3
#define ERR_NO_DATA -4
typedef struct command {
enum {
GET, SET
} command_type;
char *key;
char *data;
} command;
int parse_command(char *buffer, command *c);
#endif
The function that parses a string into a command struct:
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include "command.h"
int parse_command(char * buffer, command* c) {
int i = 0;
char *delimiter = " ";
char *token = strtok(buffer, delimiter);
char **arr = calloc(3, sizeof(char *)); // I know that a command can have at maximum 3 tokens
// Split the string into an array of strings
while (token != NULL) {
token[strcspn(token, "\n")] = '0円'; // Remove trailing new lines
arr[i] = calloc(1, strlen(token) + 1);
strcpy(arr[i], token);
token = strtok(NULL, delimiter); // Next token
i++;
}
if (strcmp(arr[0], "SET") != 0 && strcmp(arr[0], "GET") != 0) {
return ERR_COMMAND_NOT_RECOGNIZED;
}
// Parse SET command
if (strcmp(arr[0], "SET") == 0) {
if (arr[1] == NULL) {
return ERR_NO_KEY;
}
// Validate key length
if (strlen(arr[1]) > MAX_KEY_LENGTH) {
return ERR_KEY_LENGTH;
}
// Validate data
if (arr[2] == NULL) {
return ERR_NO_DATA;
}
c->command_type = SET;
c->key = arr[1];
c->data = arr[2];
}
// Parse GET command
if (strcmp(arr[0], "GET") == 0) {
if (arr[1] == NULL) {
return ERR_NO_KEY;
}
// Validate key length
if (strlen(arr[1]) > MAX_KEY_LENGTH) {
return ERR_KEY_LENGTH;
}
c->command_type = GET;
c->key = arr[1];
}
return 0;
}
Here are the unit tests (for which I'm using greatest):
#include "../lib/greatest.h"
#include "../src/command.h"
TEST should_parse_a_SET_command_from_a_string(void) {
command c;
char buffer[] = "SET key string";
int result = parse_command(buffer, &c);
ASSERT_EQ(0, result);
ASSERT_EQ(SET, c.command_type);
ASSERT_STR_EQ("key", c.key);
ASSERT_STR_EQ("string", c.data);
PASS();
}
TEST should_return_ERR_NO_KEY_if_the_SET_command_has_not_an_associated_key(void) {
command c;
char buffer[] = "SET";
int result = parse_command(buffer, &c);
ASSERT_EQ(ERR_NO_KEY, result);
PASS();
}
TEST should_return_ERR_NO_DATA_if_the_SET_command_has_no_data_associated(void) {
command c;
char buffer[] = "SET key ";
int result = parse_command(buffer, &c);
ASSERT_EQ(ERR_NO_DATA, result);
PASS();
}
TEST should_parse_a_GET_command_from_a_string(void) {
command c;
char buffer[] = "GET key";
int result = parse_command(buffer, &c);
ASSERT_EQ(0, result);
ASSERT_EQ(GET, c.command_type);
ASSERT_STR_EQ("key", c.key);
PASS();
}
TEST should_return_ERR_NO_KEY_if_the_GET_command_has_not_an_associated_key(void) {
command get;
char buffer[] = "GET";
int result = parse_command(buffer, &get);
ASSERT_EQ(ERR_NO_KEY, result);
PASS();
}
TEST should_return_ERR_KEY_LENGTH_if_the_GET_or_SET_key_length_is_greater_than_100_chars(void) {
int result;
command get, set;
char get_buffer[] = "GET abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdebcde 10 string";
char set_buffer[] = "SET abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdebcde 10 string";
result = parse_command(get_buffer, &get);
ASSERT_EQ(ERR_KEY_LENGTH, result);
result = parse_command(set_buffer, &set);
ASSERT_EQ(ERR_KEY_LENGTH, result);
PASS();
}
TEST should_return_ERR_COMMAND_NOT_RECOGNIZED_if_the_command_is_not_recognized() {
command c;
char buffer[] = "FOO BAR key 10 20";
int result = parse_command(buffer, &c);
ASSERT_EQ(ERR_COMMAND_NOT_RECOGNIZED, result);
PASS();
}
SUITE(suite) {
RUN_TEST(should_parse_a_SET_command_from_a_string);
RUN_TEST(should_return_ERR_NO_KEY_if_the_SET_command_has_not_an_associated_key);
RUN_TEST(should_return_ERR_NO_DATA_if_the_SET_command_has_no_data_associated);
RUN_TEST(should_parse_a_GET_command_from_a_string);
RUN_TEST(should_return_ERR_NO_KEY_if_the_GET_command_has_not_an_associated_key);
RUN_TEST(should_return_ERR_KEY_LENGTH_if_the_GET_or_SET_key_length_is_greater_than_100_chars);
RUN_TEST(should_return_ERR_COMMAND_NOT_RECOGNIZED_if_the_command_is_not_recognized);
}
GREATEST_MAIN_DEFS();
int main(int argc, char *argv[]) {
GREATEST_MAIN_BEGIN();
RUN_SUITE(suite);
GREATEST_MAIN_END();
}
3 Answers 3
You
calloc
arr
, but you know the length of it is always 3. So you can instead allocate it on the stack using C's built in array type. The= {0}
part means that all elements are set to all 0's (NULL
's) at the start, whichcalloc
does for you automatically.char *arr[3] = {0};
When you're splitting the input buffer on spaces, you try and write to
arr
even if you get more than three arguments. This is easily fixed by returning an error ifi == 3
in the loop. (Exercise for the reader: set a flag instead of immediately returning so that if there are other errors, like if the command is unknown, you can return those instead. Or make the errors powers of two and return them OR'ed together, so that you can return multiple errors in one go.)Instead of copying the string inside
buffer
, we can just point to that string inarr
. Note, however, that this means thatbuffer
must outlive the command. If that causes problems, just don't use this modification.I would also make it clearer that
i
is only used inside that while loop, and rewrite it as a for loop.for (int i = 0; token != NULL; i++) { if (i >= 3) return ERR_TOO_MANY_ARGUMENTS; // A new error type for the .h file token[strcspn(token, "\n")] = '0円'; // Remove trailing new lines arr[i] = token; token = strtok(NULL, delimiter); // Next token }
You can replace all instances of
arr[n]
with more readable names by#define
ing macros:#define COMMAND (arr[0]) #define KEY (arr[1]) #define DATA (arr[2])
You can replace
strcmp(...) != 0
withstrcmp(...)
andstrcmp(...) == 0
with!strcmp(...)
. (Don't worry, code with all the modifications will be posted at the end.)I would replace the if-statement for
GET
with anelse
to theSET
if-statement, and thenassert
that the command isGET
. (This makes sure that we don't make a mistake where we let a command through and it falls through both if-statements and does nothing, not modifying the command.)Here's the code in all it's glory:
#include <assert.h> #include <string.h> #include "command.h" int parse_command(char * buffer, command* c) { char *delimiter = " "; char *token = strtok(buffer, delimiter); // We can allocate the char* pointers on the stack, since the length is // constant. char *arr[3] = {0}; // Split the string into an array of strings for (int i = 0; token != NULL; i++) { // We make sure that if there are more than 3 arguments, we // don't overwrite memory after `arr`. if (i >= 3) return ERR_TOO_MANY_ARGUMENTS; token[strcspn(token, "\n")] = '0円'; // Remove trailing new lines // We can store a pointer to `token` (which is inside `buffer`) // instead of copying the string. // Note, however, that this means that `buffer` must outlive // `command`. arr[i] = token; token = strtok(NULL, delimiter); // Next token } // "Magic numbers", like arr[0], should be defined in a // macro or constant with a descriptive name. #define COMMAND (arr[0]) #define KEY (arr[1]) #define DATA (arr[2]) // Redundant comparisons against 0, are, well, redundant. if (strcmp(COMMAND, "SET") && strcmp(COMMAND, "GET")) { return ERR_COMMAND_NOT_RECOGNIZED; } // Parse SET command if (!strcmp(COMMAND, "SET")) { if (KEY == NULL) return ERR_NO_KEY; // Validate key length if (strlen(KEY) > MAX_KEY_LENGTH) { return ERR_KEY_LENGTH; } // Validate data if (DATA == NULL) { return ERR_NO_DATA; } c->command_type = SET; c->key = KEY; c->data = DATA; } else { assert(!strcmp(COMMAND, "GET")); if (KEY == NULL) { return ERR_NO_KEY; } // Validate key length if (strlen(KEY) > MAX_KEY_LENGTH) { return ERR_KEY_LENGTH; } c->command_type = GET; c->key = KEY; } return 0; }
parses a string into a command struct
Your use of testing is very effective, and isolating the parser from the rest of the system is good. However, a parser is difficult to test completely. You could use a parser-generator to have more confidence that your code is correct. Also, it's shorter and faster then building your own from the standard library. re2c has a similar example to parse an IPv4 address; I've successfully replaced your parser with this code.
#include <assert.h>
#include "../src/command.h"
/** Returns whether the command could be parsed. */
int parse_command(char *buffer, struct command *c) {
char *YYCURSOR = buffer, *YYMARKER, *o1, *o2, *o3, *o4;
/*!stags:re2c format = 'char *@@;'; */
assert(buffer && c);
scan:
/*!re2c
// http://re2c.org/manual/manual_c.html#submatch-extraction
re2c:yyfill:enable = 0;
re2c:flags:tags = 1;
re2c:define:YYCTYPE = char;
end = "\x00";
ws = [ \t\v\f\n\r];
string = [^ \t\v\f\n\r\x00]{1,100};
ws* { goto scan; } // skip blank lines and whitespace
"GET" ws+ @o1 string @o2 ws* end {
c->command_type = GET;
c->key = o1, *o2 = '0円';
c->data = 0;
return 1;
}
"SET" ws+ @o1 string @o2 ws+ @o3 string @o4 ws* end {
c->command_type = SET;
c->key = o1, *o2 = '0円';
c->data = o3, *o4 = '0円';
return 1;
}
* { return 0; }
*/
}
I simplified the contract, instead of returning an error code, it returns success. Use re2c -o parse.c parse.c.re
and modify the test programme. This also gets rid of the copying to dynamic memory that should probably be freed.
First off, you could use constants for the command strings:
char COMMAND_SET[] = "SET";
char COMMAND_GET[] = "GET";
The processing of the two commands can be slightly generalized:
if (arr[1] == NULL) {
return ERR_NO_KEY;
}
// Validate key length
if (strlen(arr[1]) > MAX_KEY_LENGTH) {
return ERR_KEY_LENGTH;
}
c->key = arr[1];
// Parse SET command
if (strcmp(arr[0], "SET") == 0) {
// Validate data
if (arr[2] == NULL) {
return ERR_NO_DATA;
}
c->command_type = SET;
c->data = arr[2];
}
// Parse GET command
if (strcmp(arr[0], "GET") == 0) {
c->command_type = GET;
}
I don't have a lot of experience with unit testing in C, so unfortunately I can't properly comment on that.
-
4\$\begingroup\$ Welcome to code review. I'm not sure of the value of using symbolic constants is in this case, since "SET" and "GET" are by default constants. \$\endgroup\$2020年07月14日 12:22:15 +00:00Commented Jul 14, 2020 at 12:22
-
\$\begingroup\$ Hi, Thank you so much for your response! In regards to code safety, is it fine? For example the usage of strcmp, or or calloc? Have I some bufferoverflow or other subtle bug around? \$\endgroup\$gabrieledarrigo– gabrieledarrigo2020年07月14日 16:47:01 +00:00Commented Jul 14, 2020 at 16:47