I tried implementing TLV(https://en.wikipedia.org/wiki/Type-length-value). Actually my implementation is more than TLV because it supports additional "Tag" elements (which you can use for naming certain TLV objects) for TLV objects. My goal is to serialize the array of TLV objects to byte array and vice versa (this I didn't implement yet). But I wrote class which handles single TLV and their serialization/deserialization. If this class is OK I believe it is much easier to extend it to arrays. Here is implementation. Feel free to comment.
Btw:
- the class assumes little endian encoding for integers
- and no unicode strings are supported
PS. So basically single TLV object is a data structure which can either hold: a numeric type, string, or byte array, a type variable indicating which type of data it is, and a tag name indicating what value it is e.g. "phone number".
.cpp file:
#include "stdafx.h"
#include "TLVObject.h"
TLVObject::TLVObject(void)
{
clearMembers();
}
TLVObject::~TLVObject(void)
{
}
//////////////////////////////////////
// Integers to byte arrays
vector<uint8_t> TLVObject::UINT16ToByteArrayLE(uint16_t paramInt)
{
vector<uint8_t> arrayOfByte(2);
for (int i = 0; i < 2; i++)
arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
return arrayOfByte;
}
vector<uint8_t> TLVObject::UINT32ToByteArrayLE(uint32_t paramInt)
{
vector<uint8_t> arrayOfByte(4);
for (int i = 0; i < 4; i++)
arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
return arrayOfByte;
}
vector<uint8_t> TLVObject::UINT64ToByteArrayLE(uint64_t paramInt)
{
vector<uint8_t> arrayOfByte(8);
for (int i = 0; i < 8; i++)
arrayOfByte[i] = ((paramInt >> (i * 8)) & 0xFF);
return arrayOfByte;
}
/////////////////////////////////////////////////
// Byte array to integers
uint16_t TLVObject::LEToUINT16(vector<uint8_t> value)
{
if(value.size() != 2)
throw exception("LEToUINT16: Wrong size");
return (uint16_t)(((uint16_t)value[1] << 8) | (uint16_t)value[0]);
}
uint32_t TLVObject::LEToUINT32(vector<uint8_t> value)
{
if(value.size() != 4)
throw exception("LEToUINT32: Wrong size");
return (uint32_t)(((uint32_t)value[3] << 24) | ((uint32_t)value[2] << 16) | ((uint32_t)value[1] << 8) | (uint32_t)value[0]);
}
uint64_t TLVObject::LEToUINT64(vector<uint8_t> value)
{
if(value.size() != 8)
throw exception("LEToUINT64: Wrong size");
uint64_t result = static_cast<uint64_t>(value[0]) |
static_cast<uint64_t>(value[1]) << 8 |
static_cast<uint64_t>(value[2]) << 16 |
static_cast<uint64_t>(value[3]) << 24 |
static_cast<uint64_t>(value[4]) << 32 |
static_cast<uint64_t>(value[5]) << 40 |
static_cast<uint64_t>(value[6]) << 48 |
static_cast<uint64_t>(value[7]) << 56;
return result;
}
///////////////////////////////////////////////
// Serialization and Deserialization methods
vector<uint8_t> TLVObject::Serialize()
{
vector<uint8_t> tmp;
// Serialize type first
tmp.push_back((uint8_t)m_tlvType);
// Serialize tag name length (which is 2 bytes max) as little endian byte array
uint16_t tagLen = (uint16_t)m_tagName.length();
tmp.push_back(UINT16ToByteArrayLE(tagLen).at(0));
tmp.push_back(UINT16ToByteArrayLE(tagLen).at(1));
// Now serialize tag name itself
for(int i = 0; i < tagLen; i++)
{
tmp.push_back((uint8_t)m_tagName.at(i));
}
// Data size 4 byte integer.
uint32_t dataSize = 0;
if(m_tlvType == BLOB)
{
dataSize = m_blob.size();
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Now, serialize the blob.
for(uint32_t i = 0; i<dataSize; i++)
tmp.push_back(m_blob[i]);
}else if(m_tlvType == STRING)
{
dataSize = m_strValue.length();
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Now, serialize the string.
for(uint32_t i = 0; i<dataSize; i++)
tmp.push_back(m_strValue[i]);
}else if(m_tlvType == BYTE)
{
dataSize = 1;
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Serialize the byte.
tmp.push_back(m_BYTE);
}else if(m_tlvType == WORD)
{
dataSize = 2;
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Now, serialize the WORD value.
tmp.push_back(UINT16ToByteArrayLE(m_WORD).at(0));
tmp.push_back(UINT16ToByteArrayLE(m_WORD).at(1));
}else if(m_tlvType == DWORD)
{
dataSize = 4;
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Now, serialize the unsigned integer.
vector<uint8_t> tmp2 = UINT32ToByteArrayLE(m_DWORD);
tmp.push_back(tmp2.at(0));
tmp.push_back(tmp2.at(1));
tmp.push_back(tmp2.at(2));
tmp.push_back(tmp2.at(3));
}else if(m_tlvType == QWORD)
{
dataSize = 8;
// Serialize size
vector<uint8_t> tmp1 = UINT32ToByteArrayLE(dataSize);
tmp.push_back(tmp1.at(0));
tmp.push_back(tmp1.at(1));
tmp.push_back(tmp1.at(2));
tmp.push_back(tmp1.at(3));
// Serialize data
vector<uint8_t> tmp2 = UINT64ToByteArrayLE(m_QWORD);
tmp.push_back(tmp2.at(0));
tmp.push_back(tmp2.at(1));
tmp.push_back(tmp2.at(2));
tmp.push_back(tmp2.at(3));
tmp.push_back(tmp2.at(4));
tmp.push_back(tmp2.at(5));
tmp.push_back(tmp2.at(6));
tmp.push_back(tmp2.at(7));
}
return tmp;
}
void TLVObject::Deserialize(vector<uint8_t> value)
{
// Get type
TLV_TYPE tlvType = static_cast<TLV_TYPE>(value.at(0));
// Get tag name size.
vector<uint8_t> tmp;
tmp.push_back(value.at(1));
tmp.push_back(value.at(2));
uint16_t tagLength = LEToUINT16(tmp);
if(tagLength > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
// Now get tag name
std::string tagName;
for(int i = 0; i<tagLength; i++)
tagName += value.at(3 + i); // Use an offset.
if(tlvType == STRING)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
clearMembers();
// Get value
m_strValue="";
for(uint32_t i = 0; i<valueSize; i++)
m_strValue += value.at(3 + tagLength + 4 + i);
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
}else if (tlvType == BLOB)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
clearMembers();
// Get value
m_blob.clear();
for(uint32_t i = 0; i<valueSize; i++)
m_blob.push_back(value.at(3 + tagLength + 4 + i));
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
}else if (tlvType == BYTE)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
clearMembers();
// Get value
m_BYTE = value.at(3 + tagLength + 4);
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
} else if (tlvType == WORD)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
// Get byte array represenginf the WORD
vector<uint8_t> tmp2;
for(int i = 0; i<2; i++)
tmp2.push_back(value.at(3 + tagLength + 4 + i));
clearMembers();
// Store word
m_WORD = LEToUINT16(tmp2);
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
}else if (tlvType == DWORD)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
// Get byte array represenginf the WORD
vector<uint8_t> tmp2;
for(int i = 0; i<4; i++)
tmp2.push_back(value.at(3 + tagLength + 4 + i));
clearMembers();
// Store work
m_DWORD = LEToUINT32(tmp2);
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
}else if (tlvType == QWORD)
{
// Get value size
vector<uint8_t> tmp;
tmp.push_back(value.at(3 + tagLength + 0));
tmp.push_back(value.at(3 + tagLength + 1));
tmp.push_back(value.at(3 + tagLength + 2));
tmp.push_back(value.at(3 + tagLength + 3));
uint32_t valueSize = LEToUINT32(tmp);
// Get byte array represenginf the WORD
vector<uint8_t> tmp2;
for(int i = 0; i<8; i++)
tmp2.push_back(value.at(3 + tagLength + 4 + i));
clearMembers();
// Store work
m_QWORD = LEToUINT64(tmp2);
// Assign other values too if we got this far.
m_tlvType = tlvType;
m_tagName = tagName;
}
}
header:
#pragma once
#include <cstdint>
#include <string.h>
#include <vector>
#define MAXTAGLENGTH 20000
using namespace std;
enum TLV_TYPE {UNDEFINED = 0, BYTE, WORD, DWORD, QWORD, STRING, BLOB};
class TLVObject
{
private:
TLV_TYPE m_tlvType; // TLV type
std::string m_tagName; // TLV tag name; tag name max length is MAXTAGLENGTH
// Possible TLV values:
// TLV data size is 4 byte unsigned integer max.
uint8_t m_BYTE;
uint16_t m_WORD;
uint32_t m_DWORD;
uint64_t m_QWORD;
string m_strValue;
vector<uint8_t> m_blob;
void clearMembers()
{
m_tlvType = UNDEFINED;
m_tagName = "";
m_BYTE = 0;
m_WORD = 0;
m_DWORD = 0;
m_QWORD = 0;
m_strValue = "";
}
// Helpers
// These work exclusively assuming little endian encoding
vector<uint8_t> UINT16ToByteArrayLE(uint16_t paramInt);
vector<uint8_t> UINT32ToByteArrayLE(uint32_t paramInt);
vector<uint8_t> UINT64ToByteArrayLE(uint64_t paramInt);
uint16_t LEToUINT16(vector<uint8_t> value);
uint32_t LEToUINT32(vector<uint8_t> value);
uint64_t LEToUINT64(vector<uint8_t> value);
public:
vector<uint8_t> Serialize();
void Deserialize(vector<uint8_t> value);
TLVObject(string tag, vector<uint8_t> blob)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = BLOB;
m_tagName = tag;
m_blob = blob;
}
TLVObject(string tag, uint8_t value)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = BYTE;
m_tagName = tag;
m_BYTE = value;
}
TLVObject(string tag, uint16_t value)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = WORD;
m_tagName = tag;
m_WORD = value;
}
TLVObject(string tag, uint32_t value)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = DWORD;
m_tagName = tag;
m_DWORD = value;
}
TLVObject(string tag, uint64_t value)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = QWORD;
m_tagName = tag;
m_QWORD = value;
}
TLVObject(string tag, string value)
{
if(tag.length() > MAXTAGLENGTH)
throw exception("Tag name length exceeded");
m_tlvType = STRING;
m_tagName = tag;
m_strValue = value;
}
string GetStringValue()
{
if(m_tlvType != STRING)
throw exception("This is not string TLV");
return m_strValue;
}
vector<uint8_t> GetBlobValue()
{
if(m_tlvType != BLOB)
throw exception("This is not BLOB TLV");
return m_blob;
}
uint8_t GetBYTEValue()
{
if(m_tlvType != BYTE)
throw exception("This is not BYTE TLV");
return m_BYTE;
}
uint16_t GetUINT16Value()
{
if(m_tlvType != WORD)
throw exception("This is not WORD TLV");
return m_WORD;
}
uint32_t GetUINT32Value()
{
if(m_tlvType != DWORD)
throw exception("This is not DWORD TLV");
return m_DWORD;
}
uint64_t GetUINT64Value()
{
if(m_tlvType != QWORD)
throw exception("This is not QWORD TLV");
return m_QWORD;
}
TLV_TYPE GetType()
{
return m_tlvType;
}
TLVObject(void);
~TLVObject(void);
};
Test program for single TLV
TLVObject obj("Name", (uint16_t)255);
vector<uint8_t> tmp = obj.Serialize(); // Serialize as byte array
TLVObject obj1;
obj1.Deserialize(tmp); // Reconstruct TLV from byte array
1 Answer 1
I see a number of things that may help you improve your code.
Don't abuse using namespace std
Putting using namespace std
at the top of every program is a bad habit that you'd do well to avoid. It is particularly bad to put it into a header file, so please don't do that. Instead, simply specify the namespace where you use it, so cout
becomes std::cout
, endl
becomes std::endl
and so forth.
Use all required #include
s
The code uses std::string
and appears to usestd::exception
but doesn't include the corresponding headers. The code should have:
#include <string>
#include <stdexcept>
And not
#include <string.h>
While the code may compile as you have it, it's solely due to the implementation details of the particular compiler and library you are using. It can't be relied on and can (and does) change from version to version of even a single compiler on a single platform. You can use a reference such as http://en.cppreference.com to make sure you know which standard header to include for each standard function.
Use polymorphism
The current design has each TLVObject
having one of each possible interpretation embedded within it. A more C++-style design would be to have a single base class and then derive the particular types from that base class. So for example, you might still have TLVObject
as the base class, but then derive TLVByte
, TLVWord
, etc. from that. Not only would that make the interface cleaner, but it would simplify adding new types.
Consider ASN.1
There is a standard called ASN.1 that already describes ways of encoding many of these types. Instead of re-inventing your own tags, you may find it useful to use the ones defined by ASN.1.
Use const where practical
Methods that do not modify the underlying object, should be declared const
:
Pass by const reference
Several places in the code, such as TLVObject::LEToUINT64(vector<uint8_t> value)
take their parameters by value. Instead, they could likely be made more efficient by passing a const
reference, as in:
TLVObject::LEToUINT64(const vector<uint8_t> &value)
This avoids copying into a new vector and signals the compiler that the passed value will not be altered.
Simplify the code
There are some overly complex pieces of code in this. For example, we have this:
// Get tag name size.
vector<uint8_t> tmp;
tmp.push_back(value.at(1));
tmp.push_back(value.at(2));
uint16_t tagLength = LEToUINT16(tmp);
That would be shorter, faster, and more portable if written like this instead:
uint16_t tagLength = value.at(2) << 8 | value.at(1);
Creating the temporary vector, passing it by value and then destroying both copies means two constructor and two destructor calls which are wholly unnecessary and only slows down the code.
About type conversions: Note that the arithmetic operators (including <<
and |
) don't operate on integral values smaller than int
. What happens is automatic type conversion to either int
or unsigned
so no explicit cast is necessary.
Prefer constexpr
to old-style #define
Rather than using a #define
for MAXTAGLENGTH
the code could use a constexpr
:
constexpr std::size_t MAXTAGLENGTH = 20000;
It doesn't make a lot of difference here, but generally the advantage is that the value has an associated type.
Eliminate unused variables
The valueSize
variable in Deserialize
is not used. That's quite strange considering the number of lines of code dedicated to extracting it.
-
\$\begingroup\$ Yeah with numeric values I know the valueSize and that's why, but no need to parse it then. PS Polymorphism advice I find unnecessary really. Thanks. I will look at these points once again later \$\endgroup\$user93510– user935102015年12月30日 18:05:34 +00:00Commented Dec 30, 2015 at 18:05
-
\$\begingroup\$ "(value.at(2) << 8)" is it ok to shift uint8_t with 8 bits?. Also how to solve namespace issue then? \$\endgroup\$user93510– user935102015年12月30日 19:59:16 +00:00Commented Dec 30, 2015 at 19:59
-
\$\begingroup\$ Yes, I've amended my answer to talk about type conversion. I'm not sure what you mean when you ask about solving a "namespace issue." \$\endgroup\$Edward– Edward2015年12月30日 20:16:04 +00:00Commented Dec 30, 2015 at 20:16
-
\$\begingroup\$ I don't think we should drop
const
while using constexpr. They actually have different semantics.constexpr const std::size_t MAXTAGLENGTH = 20000;
\$\endgroup\$AntiMoron– AntiMoron2015年12月31日 02:20:09 +00:00Commented Dec 31, 2015 at 2:20 -
\$\begingroup\$ @AntiMoron: no, constexpr implies const in this context. There is no reason to have both here. \$\endgroup\$Edward– Edward2015年12月31日 02:59:47 +00:00Commented Dec 31, 2015 at 2:59