TLV implementation in C++

Question 1

I tried implementing TLV(https://en.wikipedia.org/wiki/Type-length-value). Actually my implementation is more than TLV because it supports additional "Tag" elements (which you can use for naming certain TLV objects) for TLV objects. My goal is to serialize the array of TLV objects to byte array and vice versa (this I didn't implement yet). But I wrote class which handles single TLV and their serialization/deserialization. If this class is OK I believe it is much easier to extend it to arrays. Here is implementation. Feel free to comment.

Btw:

the class assumes little endian encoding for integers
and no unicode strings are supported

PS. So basically single TLV object is a data structure which can either hold: a numeric type, string, or byte array, a type variable indicating which type of data it is, and a tag name indicating what value it is e.g. "phone number".

.cpp file:

#include "stdafx.h"
#include "TLVObject.h"
TLVObject::TLVObject(void)
{
 clearMembers();
}
TLVObject::~TLVObject(void)
{
}
//////////////////////////////////////
// Integers to byte arrays
vector<uint8_t> TLVObject::UINT16ToByteArrayLE(uint16_t paramInt)
{
 vector<uint8_t> arrayOfByte(2);
 for (int i = 0; i < 2; i++)
 arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
 return arrayOfByte;
}
vector<uint8_t> TLVObject::UINT32ToByteArrayLE(uint32_t paramInt)
{
 vector<uint8_t> arrayOfByte(4);
 for (int i = 0; i < 4; i++)
 arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
 return arrayOfByte;
}
vector<uint8_t> TLVObject::UINT64ToByteArrayLE(uint64_t paramInt)
{
 vector<uint8_t> arrayOfByte(8);
 for (int i = 0; i < 8; i++)
 arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
 return arrayOfByte;
}
/////////////////////////////////////////////////
// Byte array to integers
uint16_t TLVObject::LEToUINT16(vector<uint8_t> value)
{
 if(value.size() != 2)
 throw exception("LEToUINT16: Wrong size");
 return (uint16_t)(((uint16_t)value[1] << 8) | (uint16_t)value[0]);
}
uint32_t TLVObject::LEToUINT32(vector<uint8_t> value)
{
 if(value.size() != 4)
 throw exception("LEToUINT32: Wrong size");
 return (uint32_t)(((uint32_t)value[3] << 24) | ((uint32_t)value[2] << 16) | ((uint32_t)value[1] << 8) | (uint32_t)value[0]); 
}
uint64_t TLVObject::LEToUINT64(vector<uint8_t> value)
{
 if(value.size() != 8)
 throw exception("LEToUINT64: Wrong size");
 uint64_t result = static_cast<uint64_t>(value[0]) |
 static_cast<uint64_t>(value[1]) << 8 |
 static_cast<uint64_t>(value[2]) << 16 |
 static_cast<uint64_t>(value[3]) << 24 |
 static_cast<uint64_t>(value[4]) << 32 |
 static_cast<uint64_t>(value[5]) << 40 |
 static_cast<uint64_t>(value[6]) << 48 |
 static_cast<uint64_t>(value[7]) << 56;
 return result;
}
///////////////////////////////////////////////
// Serialization and Deserialization methods
vector<uint8_t> TLVObject::Serialize()
{
 vector<uint8_t> tmp;
 // Serialize type first
 tmp.push_back((uint8_t)m_tlvType);
 // Serialize tag name length (which is 2 bytes max) as little endian byte array
 uint16_t tagLen = (uint16_t)m_tagName.length();
 tmp.push_back(UINT16ToByteArrayLE(tagLen).at(0));
 tmp.push_back(UINT16ToByteArrayLE(tagLen).at(1));
 // Now serialize tag name itself
 for(int i = 0; i < tagLen; i++)
 {
 tmp.push_back((uint8_t)m_tagName.at(i));
 }
 // Data size 4 byte integer.
 uint32_t dataSize = 0;
 if(m_tlvType == BLOB)
 {
 dataSize = m_blob.size();
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Now, serialize the blob.
 for(uint32_t i = 0; i<dataSize; i++)
 tmp.push_back(m_blob[i]);
 }else if(m_tlvType == STRING)
 {
 dataSize = m_strValue.length();
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Now, serialize the string.
 for(uint32_t i = 0; i<dataSize; i++)
 tmp.push_back(m_strValue[i]);
 }else if(m_tlvType == BYTE)
 {
 dataSize = 1;
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Serialize the byte.
 tmp.push_back(m_BYTE);
 }else if(m_tlvType == WORD)
 {
 dataSize = 2;
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Now, serialize the WORD value.
 tmp.push_back(UINT16ToByteArrayLE(m_WORD).at(0));
 tmp.push_back(UINT16ToByteArrayLE(m_WORD).at(1));
 }else if(m_tlvType == DWORD)
 {
 dataSize = 4;
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Now, serialize the unsigned integer.
 vector<uint8_t> tmp2 = UINT32ToByteArrayLE(m_DWORD);
 tmp.push_back(tmp2.at(0));
 tmp.push_back(tmp2.at(1));
 tmp.push_back(tmp2.at(2));
 tmp.push_back(tmp2.at(3));
 }else if(m_tlvType == QWORD)
 {
 dataSize = 8;
 // Serialize size
 vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
 tmp.push_back(tmp1.at(0));
 tmp.push_back(tmp1.at(1));
 tmp.push_back(tmp1.at(2));
 tmp.push_back(tmp1.at(3));
 // Serialize data
 vector<uint8_t> tmp2 = UINT64ToByteArrayLE(m_QWORD);
 tmp.push_back(tmp2.at(0));
 tmp.push_back(tmp2.at(1));
 tmp.push_back(tmp2.at(2));
 tmp.push_back(tmp2.at(3));
 tmp.push_back(tmp2.at(4));
 tmp.push_back(tmp2.at(5));
 tmp.push_back(tmp2.at(6));
 tmp.push_back(tmp2.at(7));
 }
 return tmp;
}
void TLVObject::Deserialize(vector<uint8_t> value)
{
 // Get type
 TLV_TYPE tlvType = static_cast<TLV_TYPE>(value.at(0));
 // Get tag name size.
 vector<uint8_t> tmp;
 tmp.push_back(value.at(1));
 tmp.push_back(value.at(2));
 uint16_t tagLength = LEToUINT16(tmp);
 if(tagLength > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 // Now get tag name
 std::string tagName;
 for(int i = 0; i<tagLength; i++)
 tagName += value.at(3 + i); // Use an offset.
 if(tlvType == STRING)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 clearMembers();
 // Get value
 m_strValue="";
 for(uint32_t i = 0; i<valueSize; i++)
 m_strValue += value.at(3 + tagLength + 4 + i);
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 }else if (tlvType == BLOB)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 clearMembers();
 // Get value
 m_blob.clear();
 for(uint32_t i = 0; i<valueSize; i++)
 m_blob.push_back(value.at(3 + tagLength + 4 + i));
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 }else if (tlvType == BYTE)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 clearMembers();
 // Get value
 m_BYTE = value.at(3 + tagLength + 4);
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 } else if (tlvType == WORD)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 // Get byte array represenginf the WORD
 vector<uint8_t> tmp2;
 for(int i = 0; i<2; i++)
 tmp2.push_back(value.at(3 + tagLength + 4 + i));
 clearMembers();
 // Store word
 m_WORD = LEToUINT16(tmp2);
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 }else if (tlvType == DWORD)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 // Get byte array represenginf the WORD
 vector<uint8_t> tmp2;
 for(int i = 0; i<4; i++)
 tmp2.push_back(value.at(3 + tagLength + 4 + i));
 clearMembers();
 // Store work
 m_DWORD = LEToUINT32(tmp2);
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 }else if (tlvType == QWORD)
 {
 // Get value size
 vector<uint8_t> tmp;
 tmp.push_back(value.at(3 + tagLength + 0));
 tmp.push_back(value.at(3 + tagLength + 1));
 tmp.push_back(value.at(3 + tagLength + 2));
 tmp.push_back(value.at(3 + tagLength + 3));
 uint32_t valueSize = LEToUINT32(tmp);
 // Get byte array represenginf the WORD
 vector<uint8_t> tmp2;
 for(int i = 0; i<8; i++)
 tmp2.push_back(value.at(3 + tagLength + 4 + i));
 clearMembers();
 // Store work
 m_QWORD = LEToUINT64(tmp2);
 // Assign other values too if we got this far.
 m_tlvType = tlvType;
 m_tagName = tagName;
 }
}

header:

#pragma once
#include <cstdint>
#include <string.h>
#include <vector>
#define MAXTAGLENGTH 20000
using namespace std;
enum TLV_TYPE {UNDEFINED = 0, BYTE, WORD, DWORD, QWORD, STRING, BLOB};
class TLVObject
{
private:
 TLV_TYPE m_tlvType; // TLV type
 std::string m_tagName; // TLV tag name; tag name max length is MAXTAGLENGTH
 // Possible TLV values:
 // TLV data size is 4 byte unsigned integer max.
 uint8_t m_BYTE;
 uint16_t m_WORD;
 uint32_t m_DWORD;
 uint64_t m_QWORD;
 string m_strValue;
 vector<uint8_t> m_blob;
 void clearMembers()
 {
 m_tlvType = UNDEFINED;
 m_tagName = "";
 m_BYTE = 0;
 m_WORD = 0;
 m_DWORD = 0;
 m_QWORD = 0;
 m_strValue = "";
 }
 // Helpers
 // These work exclusively assuming little endian encoding
 vector<uint8_t> UINT16ToByteArrayLE(uint16_t paramInt);
 vector<uint8_t> UINT32ToByteArrayLE(uint32_t paramInt);
 vector<uint8_t> UINT64ToByteArrayLE(uint64_t paramInt);
 uint16_t LEToUINT16(vector<uint8_t> value);
 uint32_t LEToUINT32(vector<uint8_t> value);
 uint64_t LEToUINT64(vector<uint8_t> value);
public:
 vector<uint8_t> Serialize();
 void Deserialize(vector<uint8_t> value);
 TLVObject(string tag, vector<uint8_t> blob)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = BLOB;
 m_tagName = tag;
 m_blob = blob;
 }
 TLVObject(string tag, uint8_t value)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = BYTE;
 m_tagName = tag;
 m_BYTE = value;
 }
 TLVObject(string tag, uint16_t value)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = WORD;
 m_tagName = tag;
 m_WORD = value;
 }
 TLVObject(string tag, uint32_t value)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = DWORD;
 m_tagName = tag;
 m_DWORD = value;
 }
 TLVObject(string tag, uint64_t value)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = QWORD;
 m_tagName = tag;
 m_QWORD = value;
 }
 TLVObject(string tag, string value)
 {
 if(tag.length() > MAXTAGLENGTH)
 throw exception("Tag name length exceeded");
 m_tlvType = STRING;
 m_tagName = tag;
 m_strValue = value;
 }
 string GetStringValue()
 {
 if(m_tlvType != STRING)
 throw exception("This is not string TLV");
 return m_strValue;
 }
 vector<uint8_t> GetBlobValue()
 {
 if(m_tlvType != BLOB)
 throw exception("This is not BLOB TLV");
 return m_blob;
 }
 uint8_t GetBYTEValue()
 {
 if(m_tlvType != BYTE)
 throw exception("This is not BYTE TLV");
 return m_BYTE;
 }
 uint16_t GetUINT16Value()
 {
 if(m_tlvType != WORD)
 throw exception("This is not WORD TLV");
 return m_WORD;
 }
 uint32_t GetUINT32Value()
 {
 if(m_tlvType != DWORD)
 throw exception("This is not DWORD TLV");
 return m_DWORD;
 }
 uint64_t GetUINT64Value()
 {
 if(m_tlvType != QWORD)
 throw exception("This is not QWORD TLV");
 return m_QWORD;
 }
 TLV_TYPE GetType()
 {
 return m_tlvType;
 }
 TLVObject(void);
 ~TLVObject(void);
};

Test program for single TLV

 TLVObject obj("Name", (uint16_t)255);
 vector<uint8_t> tmp = obj.Serialize(); // Serialize as byte array
 TLVObject obj1;
 obj1.Deserialize(tmp); // Reconstruct TLV from byte array

Question 2

I see a number of things that may help you improve your code.

Don't abuse `using namespace std`

Putting using namespace std at the top of every program is a bad habit that you'd do well to avoid. It is particularly bad to put it into a header file, so please don't do that. Instead, simply specify the namespace where you use it, so cout becomes std::cout, endl becomes std::endl and so forth.

Use all required `#include`s

The code uses std::string and appears to usestd::exception but doesn't include the corresponding headers. The code should have:

#include <string>
#include <stdexcept>

And not

#include <string.h>

While the code may compile as you have it, it's solely due to the implementation details of the particular compiler and library you are using. It can't be relied on and can (and does) change from version to version of even a single compiler on a single platform. You can use a reference such as http://en.cppreference.com to make sure you know which standard header to include for each standard function.

Use polymorphism

The current design has each TLVObject having one of each possible interpretation embedded within it. A more C++-style design would be to have a single base class and then derive the particular types from that base class. So for example, you might still have TLVObject as the base class, but then derive TLVByte, TLVWord, etc. from that. Not only would that make the interface cleaner, but it would simplify adding new types.

Consider ASN.1

There is a standard called ASN.1 that already describes ways of encoding many of these types. Instead of re-inventing your own tags, you may find it useful to use the ones defined by ASN.1.

Use const where practical

Methods that do not modify the underlying object, should be declared const:

Pass by const reference

Several places in the code, such as TLVObject::LEToUINT64(vector<uint8_t> value) take their parameters by value. Instead, they could likely be made more efficient by passing a const reference, as in:

TLVObject::LEToUINT64(const vector<uint8_t> &value)

This avoids copying into a new vector and signals the compiler that the passed value will not be altered.

Simplify the code

There are some overly complex pieces of code in this. For example, we have this:

// Get tag name size.
vector<uint8_t> tmp;
tmp.push_back(value.at(1));
tmp.push_back(value.at(2));
uint16_t tagLength = LEToUINT16(tmp);

That would be shorter, faster, and more portable if written like this instead:

uint16_t tagLength = value.at(2) << 8 | value.at(1);

Creating the temporary vector, passing it by value and then destroying both copies means two constructor and two destructor calls which are wholly unnecessary and only slows down the code.

About type conversions: Note that the arithmetic operators (including << and |) don't operate on integral values smaller than int. What happens is automatic type conversion to either int or unsigned so no explicit cast is necessary.

Prefer `constexpr` to old-style `#define`

Rather than using a #define for MAXTAGLENGTH the code could use a constexpr:

constexpr std::size_t MAXTAGLENGTH = 20000;

It doesn't make a lot of difference here, but generally the advantage is that the value has an associated type.

Eliminate unused variables

The valueSize variable in Deserialize is not used. That's quite strange considering the number of lines of code dedicated to extracting it.

Question 3

Yeah with numeric values I know the valueSize and that's why, but no need to parse it then. PS Polymorphism advice I find unnecessary really. Thanks. I will look at these points once again later

Question 4

"(value.at(2) << 8)" is it ok to shift uint8_t with 8 bits?. Also how to solve namespace issue then?

Question 5

Yes, I've amended my answer to talk about type conversion. I'm not sure what you mean when you ask about solving a "namespace issue."

Question 6

I don't think we should drop const while using constexpr. They actually have different semantics. constexpr const std::size_t MAXTAGLENGTH = 20000;

Question 7

@AntiMoron: no, constexpr implies const in this context. There is no reason to have both here.

Edward EdwardEdward 67.2k4 gold badges120 silver badges284 bronze badges · Answer 1 · 2015-12-30 15:03:30Z

I see a number of things that may help you improve your code.

Don't abuse `using namespace std`

Putting using namespace std at the top of every program is a bad habit that you'd do well to avoid. It is particularly bad to put it into a header file, so please don't do that. Instead, simply specify the namespace where you use it, so cout becomes std::cout, endl becomes std::endl and so forth.

Use all required `#include`s

The code uses std::string and appears to usestd::exception but doesn't include the corresponding headers. The code should have:

#include <string>
#include <stdexcept>

And not

#include <string.h>

While the code may compile as you have it, it's solely due to the implementation details of the particular compiler and library you are using. It can't be relied on and can (and does) change from version to version of even a single compiler on a single platform. You can use a reference such as http://en.cppreference.com to make sure you know which standard header to include for each standard function.

Use polymorphism

The current design has each TLVObject having one of each possible interpretation embedded within it. A more C++-style design would be to have a single base class and then derive the particular types from that base class. So for example, you might still have TLVObject as the base class, but then derive TLVByte, TLVWord, etc. from that. Not only would that make the interface cleaner, but it would simplify adding new types.

Consider ASN.1

There is a standard called ASN.1 that already describes ways of encoding many of these types. Instead of re-inventing your own tags, you may find it useful to use the ones defined by ASN.1.

Use const where practical

Methods that do not modify the underlying object, should be declared const:

Pass by const reference

Several places in the code, such as TLVObject::LEToUINT64(vector<uint8_t> value) take their parameters by value. Instead, they could likely be made more efficient by passing a const reference, as in:

TLVObject::LEToUINT64(const vector<uint8_t> &value)

This avoids copying into a new vector and signals the compiler that the passed value will not be altered.

Simplify the code

There are some overly complex pieces of code in this. For example, we have this:

// Get tag name size.
vector<uint8_t> tmp;
tmp.push_back(value.at(1));
tmp.push_back(value.at(2));
uint16_t tagLength = LEToUINT16(tmp);

That would be shorter, faster, and more portable if written like this instead:

uint16_t tagLength = value.at(2) << 8 | value.at(1);

Creating the temporary vector, passing it by value and then destroying both copies means two constructor and two destructor calls which are wholly unnecessary and only slows down the code.

About type conversions: Note that the arithmetic operators (including << and |) don't operate on integral values smaller than int. What happens is automatic type conversion to either int or unsigned so no explicit cast is necessary.

Prefer `constexpr` to old-style `#define`

Rather than using a #define for MAXTAGLENGTH the code could use a constexpr:

constexpr std::size_t MAXTAGLENGTH = 20000;

It doesn't make a lot of difference here, but generally the advantage is that the value has an associated type.

Eliminate unused variables

The valueSize variable in Deserialize is not used. That's quite strange considering the number of lines of code dedicated to extracting it.

Yeah with numeric values I know the valueSize and that's why, but no need to parse it then. PS Polymorphism advice I find unnecessary really. Thanks. I will look at these points once again later
"(value.at(2) << 8)" is it ok to shift uint8_t with 8 bits?. Also how to solve namespace issue then?
Yes, I've amended my answer to talk about type conversion. I'm not sure what you mean when you ask about solving a "namespace issue."
I don't think we should drop const while using constexpr. They actually have different semantics. constexpr const std::size_t MAXTAGLENGTH = 20000;
@AntiMoron: no, constexpr implies const in this context. There is no reason to have both here.

Stack Exchange Network

TLV implementation in C++

1 Answer 1

Don't abuse `using namespace std`

Use all required `#include`s

Use polymorphism

Consider ASN.1

Use const where practical

Pass by const reference

Simplify the code

Prefer `constexpr` to old-style `#define`

Eliminate unused variables

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

TLV implementation in C++

1 Answer 1

Don't abuse using namespace std

Use all required #includes

Use polymorphism

Consider ASN.1

Use const where practical

Pass by const reference

Simplify the code

Prefer constexpr to old-style #define

Eliminate unused variables

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions

Don't abuse `using namespace std`

Use all required `#include`s

Prefer `constexpr` to old-style `#define`