Parse a contrived message protocol with first byte data length followed by data

Question 1

I think this code to parse a contrived message protocol with first byte data length followed by data is a little ugly. Has anyone got any suggestions on how to make it more elegant and more robust?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LENGTHBYTES 1
void splitmessages(unsigned char* bstream, int length);
void printmsg(unsigned char* msg);
int main() {
 unsigned char stream[] = {2, 'A', 'A', 1, 'B', 3, 'C', 'C', 'C', 1, 'D' };
 int len = sizeof(stream) / sizeof(stream[0]);
 printf("\nTest 1 - four messages together\n");
 splitmessages(stream, len); //works
 printf("\nTest 2 - two complete msgs, then 2 complete msgs\n");
 splitmessages(stream, len-2); //works
 splitmessages(stream+9, 2);
 //split across a message - in data
 printf("\nTest 3 - messages split across data part\n");
 splitmessages(stream, len-4); //works
 splitmessages(stream+7, 4);
 //split across a message - just after size
 printf("\nTest 4 - messages split just after size part\n");
 splitmessages(stream, len-5); 
 splitmessages(stream+6, 5);
 //split across a message - just before size
 printf("\nTest 5 - messages split just before size part\n");
 splitmessages(stream, len-6); 
 splitmessages(stream+5, 6);
 return 0;
}
void splitmessages(unsigned char* bstream, int length) {
 //Create msg as soon as you have size and save as static
 static unsigned char* msg = 0;
 static int tempbufposition = 0;
 static int tocompletemsg = 0;
 enum STAGE { SIZE, DATA };
 STAGE stage = SIZE;
 int size = 0;
 //adjust stage to DATA if already have start of msg
 if (tocompletemsg != 0) 
 stage = DATA;
 //get cached size
 if(tempbufposition != 0 && msg) 
 size = msg[0];
 for(int i=0; i<length; ++i){
 switch(stage) {
 case SIZE:
 size = bstream[i];
 if(msg) {
 free(msg);
 msg = 0;
 tempbufposition = 0;
 tocompletemsg = 0;
 }
 //re-create complete msg so add 1 byte for size
 msg = (unsigned char*)malloc(size+LENGTHBYTES); 
 msg[0] = size;
 tempbufposition = 1;
 tocompletemsg = size;
 stage = DATA;
 break;
 case DATA:
 //do we have enough data for a complete message
 if(i + tocompletemsg <= length) {
 memcpy(msg+tempbufposition, &bstream[i], tocompletemsg);
 printmsg(msg);
 stage = SIZE;
 i += tocompletemsg-1;
 tocompletemsg = 0;
 tempbufposition = 0;
 } else {
 //add available data
 memcpy(msg+tempbufposition, &bstream[i], length-i);
 tempbufposition +=(length-i);
 tocompletemsg -= (length-i);
 i += (length-i)-1;
 }
 break;
 }
 } 
}
void printmsg(unsigned char* msg) {
 if(msg && *msg) {
 int size = msg[0];
 printf("%d ", msg[0]);
 for(int i = 1; i <= size; ++i)
 printf("%c ", msg[i]);
 printf("\n");
 }
}

Question 2

Below is an alternative example of how messages could be extracted. Instead of a state machine with saved-state inside your splitmessages function, I have moved the saved-state out of the function into a structure that is passed-in. I have also checked for unexpected data at the start of the function. The return value is 0 on failure or the number of bytes consumed from the input stream on success. I didn't check for size bytes occurring in the wrong place - replace the memcpy with a function that checks the bytes copied for that.

Note that your protocol has to distinguish between data bytes and size bytes. I have arbitrarily made the boundary 32, the first printable char.

#include <string.h>
#include <assert.h>
#define DATA_MIN_VALUE 32
#define DATA_MAX_SIZE (DATA_MIN_VALUE)
struct message {
 size_t size;
 size_t remaining;
 char data[DATA_MAX_SIZE];
}; 
typedef struct message Message;
static inline size_t min(size_t a, size_t b)
{
 return a < b ? a : b;
}
static inline int is_data(unsigned char ch)
{
 return ch >= DATA_MIN_VALUE;
}
static size_t
get_message(Message *msg, const unsigned char *bstream, size_t length)
{
 size_t len;
 assert(length);
 if ((msg->remaining == 0) && is_data(bstream[0])) {
 /* Data instead of message; caller must handle - eg. discard data up
 * to next message */
 return 0;
 }
 if (is_data(bstream[0])) {
 msg->size = 0;
 msg->remaining = bstream[0];
 }
 len = min(length-1, msg->remaining);
 memcpy(&msg->data[msg->size], &bstream[1], len);
 msg->size += len;
 msg->remaining -= len;
 return len + 1;
}

The code should compile but is untested. Hope it helps to have another perspective.

William Morris William Morris 9,38219 silver badges42 bronze badges · Accepted Answer · 2012-10-13 02:26:46Z

Below is an alternative example of how messages could be extracted. Instead of a state machine with saved-state inside your splitmessages function, I have moved the saved-state out of the function into a structure that is passed-in. I have also checked for unexpected data at the start of the function. The return value is 0 on failure or the number of bytes consumed from the input stream on success. I didn't check for size bytes occurring in the wrong place - replace the memcpy with a function that checks the bytes copied for that.

Note that your protocol has to distinguish between data bytes and size bytes. I have arbitrarily made the boundary 32, the first printable char.

#include <string.h>
#include <assert.h>
#define DATA_MIN_VALUE 32
#define DATA_MAX_SIZE (DATA_MIN_VALUE)
struct message {
 size_t size;
 size_t remaining;
 char data[DATA_MAX_SIZE];
}; 
typedef struct message Message;
static inline size_t min(size_t a, size_t b)
{
 return a < b ? a : b;
}
static inline int is_data(unsigned char ch)
{
 return ch >= DATA_MIN_VALUE;
}
static size_t
get_message(Message *msg, const unsigned char *bstream, size_t length)
{
 size_t len;
 assert(length);
 if ((msg->remaining == 0) && is_data(bstream[0])) {
 /* Data instead of message; caller must handle - eg. discard data up
 * to next message */
 return 0;
 }
 if (is_data(bstream[0])) {
 msg->size = 0;
 msg->remaining = bstream[0];
 }
 len = min(length-1, msg->remaining);
 memcpy(&msg->data[msg->size], &bstream[1], len);
 msg->size += len;
 msg->remaining -= len;
 return len + 1;
}

The code should compile but is untested. Hope it helps to have another perspective.

Stack Exchange Network

Parse a contrived message protocol with first byte data length followed by data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Parse a contrived message protocol with first byte data length followed by data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions