I think this code to parse a contrived message protocol with first byte data length followed by data is a little ugly. Has anyone got any suggestions on how to make it more elegant and more robust?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LENGTHBYTES 1
void splitmessages(unsigned char* bstream, int length);
void printmsg(unsigned char* msg);
int main() {
unsigned char stream[] = {2, 'A', 'A', 1, 'B', 3, 'C', 'C', 'C', 1, 'D' };
int len = sizeof(stream) / sizeof(stream[0]);
printf("\nTest 1 - four messages together\n");
splitmessages(stream, len); //works
printf("\nTest 2 - two complete msgs, then 2 complete msgs\n");
splitmessages(stream, len-2); //works
splitmessages(stream+9, 2);
//split across a message - in data
printf("\nTest 3 - messages split across data part\n");
splitmessages(stream, len-4); //works
splitmessages(stream+7, 4);
//split across a message - just after size
printf("\nTest 4 - messages split just after size part\n");
splitmessages(stream, len-5);
splitmessages(stream+6, 5);
//split across a message - just before size
printf("\nTest 5 - messages split just before size part\n");
splitmessages(stream, len-6);
splitmessages(stream+5, 6);
return 0;
}
void splitmessages(unsigned char* bstream, int length) {
//Create msg as soon as you have size and save as static
static unsigned char* msg = 0;
static int tempbufposition = 0;
static int tocompletemsg = 0;
enum STAGE { SIZE, DATA };
STAGE stage = SIZE;
int size = 0;
//adjust stage to DATA if already have start of msg
if (tocompletemsg != 0)
stage = DATA;
//get cached size
if(tempbufposition != 0 && msg)
size = msg[0];
for(int i=0; i<length; ++i){
switch(stage) {
case SIZE:
size = bstream[i];
if(msg) {
free(msg);
msg = 0;
tempbufposition = 0;
tocompletemsg = 0;
}
//re-create complete msg so add 1 byte for size
msg = (unsigned char*)malloc(size+LENGTHBYTES);
msg[0] = size;
tempbufposition = 1;
tocompletemsg = size;
stage = DATA;
break;
case DATA:
//do we have enough data for a complete message
if(i + tocompletemsg <= length) {
memcpy(msg+tempbufposition, &bstream[i], tocompletemsg);
printmsg(msg);
stage = SIZE;
i += tocompletemsg-1;
tocompletemsg = 0;
tempbufposition = 0;
} else {
//add available data
memcpy(msg+tempbufposition, &bstream[i], length-i);
tempbufposition +=(length-i);
tocompletemsg -= (length-i);
i += (length-i)-1;
}
break;
}
}
}
void printmsg(unsigned char* msg) {
if(msg && *msg) {
int size = msg[0];
printf("%d ", msg[0]);
for(int i = 1; i <= size; ++i)
printf("%c ", msg[i]);
printf("\n");
}
}
1 Answer 1
Below is an alternative example of how messages could be extracted. Instead of a state machine with saved-state inside your splitmessages function, I have moved the saved-state out of the function into a structure that is passed-in. I have also checked for unexpected data at the start of the function. The return value is 0 on failure or the number of bytes consumed from the input stream on success. I didn't check for size bytes occurring in the wrong place - replace the memcpy with a function that checks the bytes copied for that.
Note that your protocol has to distinguish between data bytes and size bytes. I have arbitrarily made the boundary 32, the first printable char.
#include <string.h>
#include <assert.h>
#define DATA_MIN_VALUE 32
#define DATA_MAX_SIZE (DATA_MIN_VALUE)
struct message {
size_t size;
size_t remaining;
char data[DATA_MAX_SIZE];
};
typedef struct message Message;
static inline size_t min(size_t a, size_t b)
{
return a < b ? a : b;
}
static inline int is_data(unsigned char ch)
{
return ch >= DATA_MIN_VALUE;
}
static size_t
get_message(Message *msg, const unsigned char *bstream, size_t length)
{
size_t len;
assert(length);
if ((msg->remaining == 0) && is_data(bstream[0])) {
/* Data instead of message; caller must handle - eg. discard data up
* to next message */
return 0;
}
if (is_data(bstream[0])) {
msg->size = 0;
msg->remaining = bstream[0];
}
len = min(length-1, msg->remaining);
memcpy(&msg->data[msg->size], &bstream[1], len);
msg->size += len;
msg->remaining -= len;
return len + 1;
}
The code should compile but is untested. Hope it helps to have another perspective.