Binary serialization library

Question 1

I am currently working on a binary serialization library written in C++11 for a personal project. I'd really like to have a review about my design, my implementation and everything else. The library is inspired from QDataStream.

Here is the GitLab repository containing the project.

Sery/Misc.hh

#ifndef SERY_MISC_HH_
#define SERY_MISC_HH_
#define SERY_BEGIN_NAMESPACE namespace Sery {
#define SERY_END_NAMESPACE }
#include <cstdint>
#include <type_traits>
SERY_BEGIN_NAMESPACE
template< bool B, class T = void >
using enable_if_t = typename ::std::enable_if<B, T>::type;
enum Endian
{
 LittleEndian,
 BigEndian
};
typedef std::int8_t int8;
typedef std::int16_t int16;
typedef std::int32_t int32;
typedef std::int64_t int64;
typedef std::uint8_t uint8;
typedef std::uint16_t uint16;
typedef std::uint32_t uint32;
typedef std::uint64_t uint64;
namespace detail
{
 inline Endian getSoftwareEndian()
 {
 int16 witness = 0x5501;
 int8 test = *((int8*)&witness);
 return (test == 1 ? Endian::LittleEndian : Endian::BigEndian);
 }
}
SERY_END_NAMESPACE
#endif // SERY_MISC_HH_

Sery/Buffer.hh

#ifndef SERY_BUFFER_HH_
#define SERY_BUFFER_HH_
#include <vector>
#include <Sery/IBuffer.hh>
SERY_BEGIN_NAMESPACE
class Buffer : public IBuffer
{
public:
 Buffer();
 Buffer(const char *buffer, uint32 size);
 ~Buffer();
public:
 virtual void writeRaw(const char *buffer, uint32 size) final;
 virtual void readRaw(char *buffer, uint32 size) final;
 virtual uint32 size() const final;
 virtual const char *data() const final;
private:
 std::vector<char> _buffer;
};
SERY_END_NAMESPACE
#endif // SERY_BUFFER_HH_

Sery/Buffer.cpp

#include <Sery/Buffer.hh>
#include <sstream>
#include <iomanip>
#include <string.h>
SERY_BEGIN_NAMESPACE
Buffer::Buffer()
 : _buffer()
{
}
Buffer::Buffer(const char *buffer, uint32 size)
 : _buffer(buffer, buffer + size)
{
}
Buffer::~Buffer()
{
}
void Buffer::writeRaw(const char *buffer, uint32 size)
{
 _buffer.insert(_buffer.end(), buffer, buffer + size);
}
void Buffer::readRaw(char *buffer, uint32 size)
{
 memcpy(buffer, _buffer.data(), size);
 _buffer.erase(_buffer.begin(), _buffer.begin() + size);
}
uint32 Buffer::size() const
{
 return ((uint32)_buffer.size());
}
const char *Buffer::data() const
{
 return (_buffer.data());
}
SERY_END_NAMESPACE

Sery/Stream.hh

#ifndef SERY_STREAM_HH_
#define SERY_STREAM_HH_
#include <Sery/Misc.hh>
SERY_BEGIN_NAMESPACE
class IBuffer;
class Stream
{
private:
 // Removing copy and move functions
 Stream(const Stream&) = delete;
 Stream(Stream&&) = delete;
 Stream &operator=(const Stream&) = delete;
 Stream &operator=(Stream&&) = delete;
private:
 static Endian globalEndian;
public:
 static void setGlobalEndian(Endian endian);
 static Endian getGlobalEndian();
public:
 // Must be constructed with an IBuffer
 Stream(IBuffer &buffer);
 ~Stream();
public:
 // Proxies for lower level manipulation
 Stream &writeRaw(const char *buffer, uint32 size);
 Stream &readRaw(char *buffer, uint32 size);
public:
 void setLocalEndian(Endian endian);
 Endian getLocalEndian() const;
private:
 IBuffer &_buffer;
 Endian _localEndian;
};
// (De)Serialization of a C-Style string
Stream &operator<<(Stream &stream, const char *str);
Stream &operator>>(Stream &stream, char *&str);
// (De)Serialization of all arithmetic types
template <class T, enable_if_t<std::is_arithmetic<T>::value> * = nullptr>
Stream &operator<<(Stream &stream, T t);
template <class T, enable_if_t<std::is_arithmetic<T>::value> * = nullptr>
Stream &operator>>(Stream &stream, T &t);
SERY_END_NAMESPACE
#include "detail/Stream_STD.hh" // Contains serialization for standard library
#endif // SERY_STREAM_HH_

Sery/Stream.cpp

#include <Sery/Stream.hh>
#include <Sery/IBuffer.hh>
#include <cstring>
#include <iostream>
SERY_BEGIN_NAMESPACE
Stream::Stream(IBuffer &buffer)
: _buffer(buffer),
_localEndian(Stream::globalEndian)
{
}
Stream::~Stream()
{
}
Stream &Stream::writeRaw(const char *buffer, uint32 size)
{
 _buffer.writeRaw(buffer, size);
 return (*this);
}
Stream &Stream::readRaw(char *buffer, uint32 size)
{
 _buffer.readRaw(buffer, size);
 return (*this);
}
void Stream::setLocalEndian(Endian endian)
{
 _localEndian = endian;
}
Endian Stream::getLocalEndian() const
{
 return (_localEndian);
}
// Static members
Endian Stream::globalEndian = Endian::BigEndian;
void Stream::setGlobalEndian(Endian endian)
{
 globalEndian = endian;
}
Endian Stream::getGlobalEndian()
{
 return (globalEndian);
}
//////////////////////////////////////////
// External functions for serialization //
//////////////////////////////////////////
// Templates for serializing arithmetics types
template <class T, enable_if_t<std::is_arithmetic<T>::value> *>
Stream &operator<<(Stream &stream, T t)
{
 Endian softwareEndian = detail::getSoftwareEndian();
 Endian currentEndian = stream.getLocalEndian();
 char buffer[sizeof(T)];
 const uint8 *p = reinterpret_cast<const uint8 *>(&t);
 for (size_t index = 0;
 index < sizeof(T);
 ++index)
 {
 if (currentEndian == softwareEndian)
 buffer[index] = *p++;
 else
 buffer[sizeof(T) - index - 1] = *p++;
 }
 stream.writeRaw(buffer, sizeof(T));
 return (stream);
}
// Explicit instantiations of templates functions
template Stream &operator<< <int8> (Stream &, int8);
template Stream &operator<< <int16> (Stream &, int16);
template Stream &operator<< <int32> (Stream &, int32);
template Stream &operator<< <int64> (Stream &, int64);
template Stream &operator<< <uint8> (Stream &, uint8);
template Stream &operator<< <uint16> (Stream &, uint16);
template Stream &operator<< <uint32> (Stream &, uint32);
template Stream &operator<< <uint64> (Stream &, uint64);
template Stream &operator<< <float> (Stream &, float);
template Stream &operator<< <double> (Stream &, double);
template Stream &operator<< <long double> (Stream &, long double);
// Templates for deserializing arithmetics types
template <class T, enable_if_t<std::is_arithmetic<T>::value> *>
Stream &operator>>(Stream &stream, T &t)
{
 Endian softwareEndian = detail::getSoftwareEndian();
 Endian currentEndian = stream.getLocalEndian();
 char buffer[sizeof(T)];
 stream.readRaw(buffer, sizeof(T));
 uint8 *p = reinterpret_cast<uint8 *>(&t);
 for (size_t index = 0;
 index < sizeof(T);
 ++index)
 {
 if (currentEndian == softwareEndian)
 *p++ = buffer[index];
 else
 *p++ = buffer[sizeof(T) - index - 1];
 }
 return (stream);
}
// Explicit instantiations of templates functions
template Stream &operator>> <int8> (Stream &, int8 &);
template Stream &operator>> <int16> (Stream &, int16 &);
template Stream &operator>> <int32> (Stream &, int32 &);
template Stream &operator>> <int64> (Stream &, int64 &);
template Stream &operator>> <uint8> (Stream &, uint8 &);
template Stream &operator>> <uint16> (Stream &, uint16 &);
template Stream &operator>> <uint32> (Stream &, uint32 &);
template Stream &operator>> <uint64> (Stream &, uint64 &);
template Stream &operator>> <float> (Stream &, float &);
template Stream &operator>> <double> (Stream &, double &);
template Stream &operator>> <long double> (Stream &, long double &);
// (De)Serialization of C-Style strings
Stream &operator<<(Stream &stream, const char *str)
{
 uint32 len = (uint32)std::strlen(str) + 1;
 stream << len;
 stream.writeRaw(str, len);
 return (stream);
}
Stream &operator>>(Stream &stream, char *&str)
{
 uint32 len = 0;
 stream >> len;
 char *buffer = new char[len];
 stream.readRaw(buffer, len);
 str = buffer;
 return (stream);
}
SERY_END_NAMESPACE

Question 2

You should use network byte order to deal with endianess. See htonl() and family. In addition to Endianess usally floating point values are not stored in IEEE-754 Interchange formats internally. They are usually IEEE-754 but one of the "Extended precision formats" and you can not translate directly to other platforms binary formats. You should convert to one of the standard Interchange formats before writing to your stream.

Question 3

Example: On my mac long double is 128 bits but only 96 bits are used (because that is the size of the the FP register. On the virtual unix machine I run on my mac. The `long double is 128 bits. But only 80 bits are used. See: github.com/Loki-Astari/ThorsSerializer/blob/master/src/…

Question 4

@Morwenn I have updated the question. The first file is now Sery/Misc.hh, defining some utils for Sery.

Question 5

@LokiAstari Thanks for your feedback. About the endianness, I wanted the user to be able to change it himself. The network's endian is supposed to be Big, isn't it ? Since I thought so, I made Sery::globalEndian being BigEndian by default.

Question 6

That's fine. :-) Just check your platform is using IEEE-754 std::numeric_limits<T>::is_iec559. Then check the size of your float with sizeof() then check and the number of digits. std::numeric_limits<T>::digits. With the size and number of digits you can check if is an IEEE-754 interchange format. If it is then you don't need to do anything. If it is not then unless you really want the support just throw an error. This will wolve 99.99% of situations.

Question 7

A few notes:

stop using C-style casts; Better alternative exists
stop wraping return expressions in parenthesis (every time I see that I think "this is a C-style value cast ... no wait!")
do not generate code using macros; the namespace macros should only be used if you are going to build on a platform that has no namespace support (are you? you didn't specify)
your setter and getter for GlobalEndian only set and get the value;

Better solution: consider setting the value itself public and removing the accessors (they don't add anything and effectively they expose the value as if it were public).

Even better solution: Ensure that each class has it's own (private) copy of the endianness flag and initialize the private copy upon construction (with a default value); Instances should have no public getter or setter. This is because you cannot change the endianness in the middle of writing to the stream and still expect the written data to be valid.

The way you arrange the code by columns makes it (marginally) easier to read, but over the lifetime of a project, you will either get some tokens out of sync with each other, or have to realign entire lists of functions when you change the length of an identifier; it is debatable if the extra ease in reading the code is worth it - because once you get used to reading untabulated code, the extra alignment adds nothing but extra maintenance effort).

Question 8

Thanks for you comment, I really appreciate it! I'll answer in different comments to keep things clear. Firstly : about the C-style casts, you're totally right, I should avoid them. But since static_cast and reinterpret_cast are so long to write, I think about making proxies functions like s_c and r_c. But it really ruins the readability.

Question 9

About return expressions wrapped in parenthesis, I am really used to it. Do you really think it is that bad? I think it really depends on what people are used to.

Question 10

The casts are long by design. They are supposed to be large and explicit in code, making them easy to see. Ideally, you should write code that doesn't need them at all; when you use them in a lot of places, that is usually a sign that your architecture and APIs should be improved; As such, usually when you have casts, the solution is to refactor, not to alias them to something smaller (because that doesn't actually solve the problem).

Question 11

Code generation using macro is quite useful when it comes to MetaProgramming. However, you are right, there is no point in wrapping the namespace declaration inside a macro like I did there. It simply prevented my IDE from indenting inside the namespace.

Question 12

The purpose of the parenthesis is to allow you to write #define return (x)do_some_operation(x); return x (or something similar). It is a remnant of old C-style code, from an era when templates and classes didn't exist. In modern code they add nothing (but cruft) because RAII makes such code obsolete.

Question 13

Another few notes:

You can use #pragma once instead of include guards. This is a non-standard but widely supported preprocessor directive. Most of the modern compilers support it.
IMHO, namespace Sery { } is much more clearer than SERY_BEGIN_NAMESPACE / SERY_END_NAMESPACE. Also consider writing } // namespace Sery at the bottom of the file for clarity.
Consider marking Buffer class as final. Otherwise your Buffer::~Buffer() should have been either either public and virtual, or protected and non-virtual (Sutter's rule). The same for Stream class.
I also agree with @utnapistim about code formatting.
The leading underscores for private members do not break any rules, but it is close to it (e.g. if you start an identifier with a capital letter). Consider using trailing underscore for that matter. You'll get used to it soon.

Question 14

I don't really like pragma once because it is non-standard. I try to avoid non-standard things as much as possible.

Question 15

Yeah, I will remove the #define SERY_BEGIN_NAMESPACE from the code and use the "classical" way.

Question 16

You're right, I should have made Sery::Buffer and Sery::Stream final since they are not meant to be inherited from.

Question 17

About the leading underscore, as you said, it doesn't break any rule. Thus, I don't feel bad about doing it. I like it in front of the name because it allows me to know instantly where it comes from when I read the code.

Question 18

Btw, if you don't trust #pragma once because it's non-standard, you should consider using it in combination with include guards. It saves compilation time. If #pragma once is supported, compiler will stop parsing the file as soon as the directive is found, which is not the case for include guards.

Question 19

reinterpret_cast to uint8* violates strict aliasing rules and lead to undefined behavior. Use char* as it is granted exception in the standard.

utnapistim utnapistim 3,62715 silver badges21 bronze badges · Accepted Answer · 2015-07-29 09:33:00Z

A few notes:

stop using C-style casts; Better alternative exists
stop wraping return expressions in parenthesis (every time I see that I think "this is a C-style value cast ... no wait!")
do not generate code using macros; the namespace macros should only be used if you are going to build on a platform that has no namespace support (are you? you didn't specify)
your setter and getter for GlobalEndian only set and get the value;

Better solution: consider setting the value itself public and removing the accessors (they don't add anything and effectively they expose the value as if it were public).

Even better solution: Ensure that each class has it's own (private) copy of the endianness flag and initialize the private copy upon construction (with a default value); Instances should have no public getter or setter. This is because you cannot change the endianness in the middle of writing to the stream and still expect the written data to be valid.

The way you arrange the code by columns makes it (marginally) easier to read, but over the lifetime of a project, you will either get some tokens out of sync with each other, or have to realign entire lists of functions when you change the length of an identifier; it is debatable if the extra ease in reading the code is worth it - because once you get used to reading untabulated code, the extra alignment adds nothing but extra maintenance effort).

Thanks for you comment, I really appreciate it! I'll answer in different comments to keep things clear. Firstly : about the C-style casts, you're totally right, I should avoid them. But since static_cast and reinterpret_cast are so long to write, I think about making proxies functions like s_c and r_c. But it really ruins the readability.
About return expressions wrapped in parenthesis, I am really used to it. Do you really think it is that bad? I think it really depends on what people are used to.
The casts are long by design. They are supposed to be large and explicit in code, making them easy to see. Ideally, you should write code that doesn't need them at all; when you use them in a lot of places, that is usually a sign that your architecture and APIs should be improved; As such, usually when you have casts, the solution is to refactor, not to alias them to something smaller (because that doesn't actually solve the problem).
Code generation using macro is quite useful when it comes to MetaProgramming. However, you are right, there is no point in wrapping the namespace declaration inside a macro like I did there. It simply prevented my IDE from indenting inside the namespace.
The purpose of the parenthesis is to allow you to write #define return (x)do_some_operation(x); return x (or something similar). It is a remnant of old C-style code, from an era when templates and classes didn't exist. In modern code they add nothing (but cruft) because RAII makes such code obsolete.

Stack Exchange Network

Binary serialization library

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Binary serialization library

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions