3
\$\begingroup\$

I think this code to parse a contrived message protocol with first byte data length followed by data is a little ugly. Has anyone got any suggestions on how to make it more elegant and more robust?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LENGTHBYTES 1
void splitmessages(unsigned char* bstream, int length);
void printmsg(unsigned char* msg);
int main() {
 unsigned char stream[] = {2, 'A', 'A', 1, 'B', 3, 'C', 'C', 'C', 1, 'D' };
 int len = sizeof(stream) / sizeof(stream[0]);
 printf("\nTest 1 - four messages together\n");
 splitmessages(stream, len); //works
 printf("\nTest 2 - two complete msgs, then 2 complete msgs\n");
 splitmessages(stream, len-2); //works
 splitmessages(stream+9, 2);
 //split across a message - in data
 printf("\nTest 3 - messages split across data part\n");
 splitmessages(stream, len-4); //works
 splitmessages(stream+7, 4);
 //split across a message - just after size
 printf("\nTest 4 - messages split just after size part\n");
 splitmessages(stream, len-5); 
 splitmessages(stream+6, 5);
 //split across a message - just before size
 printf("\nTest 5 - messages split just before size part\n");
 splitmessages(stream, len-6); 
 splitmessages(stream+5, 6);
 return 0;
}
void splitmessages(unsigned char* bstream, int length) {
 //Create msg as soon as you have size and save as static
 static unsigned char* msg = 0;
 static int tempbufposition = 0;
 static int tocompletemsg = 0;
 enum STAGE { SIZE, DATA };
 STAGE stage = SIZE;
 int size = 0;
 //adjust stage to DATA if already have start of msg
 if (tocompletemsg != 0) 
 stage = DATA;
 //get cached size
 if(tempbufposition != 0 && msg) 
 size = msg[0];
 for(int i=0; i<length; ++i){
 switch(stage) {
 case SIZE:
 size = bstream[i];
 if(msg) {
 free(msg);
 msg = 0;
 tempbufposition = 0;
 tocompletemsg = 0;
 }
 //re-create complete msg so add 1 byte for size
 msg = (unsigned char*)malloc(size+LENGTHBYTES); 
 msg[0] = size;
 tempbufposition = 1;
 tocompletemsg = size;
 stage = DATA;
 break;
 case DATA:
 //do we have enough data for a complete message
 if(i + tocompletemsg <= length) {
 memcpy(msg+tempbufposition, &bstream[i], tocompletemsg);
 printmsg(msg);
 stage = SIZE;
 i += tocompletemsg-1;
 tocompletemsg = 0;
 tempbufposition = 0;
 } else {
 //add available data
 memcpy(msg+tempbufposition, &bstream[i], length-i);
 tempbufposition +=(length-i);
 tocompletemsg -= (length-i);
 i += (length-i)-1;
 }
 break;
 }
 } 
}
void printmsg(unsigned char* msg) {
 if(msg && *msg) {
 int size = msg[0];
 printf("%d ", msg[0]);
 for(int i = 1; i <= size; ++i)
 printf("%c ", msg[i]);
 printf("\n");
 }
}
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked Oct 6, 2012 at 15:55
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

Below is an alternative example of how messages could be extracted. Instead of a state machine with saved-state inside your splitmessages function, I have moved the saved-state out of the function into a structure that is passed-in. I have also checked for unexpected data at the start of the function. The return value is 0 on failure or the number of bytes consumed from the input stream on success. I didn't check for size bytes occurring in the wrong place - replace the memcpy with a function that checks the bytes copied for that.

Note that your protocol has to distinguish between data bytes and size bytes. I have arbitrarily made the boundary 32, the first printable char.

#include <string.h>
#include <assert.h>
#define DATA_MIN_VALUE 32
#define DATA_MAX_SIZE (DATA_MIN_VALUE)
struct message {
 size_t size;
 size_t remaining;
 char data[DATA_MAX_SIZE];
}; 
typedef struct message Message;
static inline size_t min(size_t a, size_t b)
{
 return a < b ? a : b;
}
static inline int is_data(unsigned char ch)
{
 return ch >= DATA_MIN_VALUE;
}
static size_t
get_message(Message *msg, const unsigned char *bstream, size_t length)
{
 size_t len;
 assert(length);
 if ((msg->remaining == 0) && is_data(bstream[0])) {
 /* Data instead of message; caller must handle - eg. discard data up
 * to next message */
 return 0;
 }
 if (is_data(bstream[0])) {
 msg->size = 0;
 msg->remaining = bstream[0];
 }
 len = min(length-1, msg->remaining);
 memcpy(&msg->data[msg->size], &bstream[1], len);
 msg->size += len;
 msg->remaining -= len;
 return len + 1;
}

The code should compile but is untested. Hope it helps to have another perspective.

answered Oct 13, 2012 at 2:26
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.