I am currently working on a binary serialization library written in C++11 for a personal project. I'd really like to have a review about my design, my implementation and everything else. The library is inspired from QDataStream
.
Here is the GitLab repository containing the project.
Sery/Misc.hh
#ifndef SERY_MISC_HH_
#define SERY_MISC_HH_
#define SERY_BEGIN_NAMESPACE namespace Sery {
#define SERY_END_NAMESPACE }
#include <cstdint>
#include <type_traits>
SERY_BEGIN_NAMESPACE
template< bool B, class T = void >
using enable_if_t = typename ::std::enable_if<B, T>::type;
enum Endian
{
LittleEndian,
BigEndian
};
typedef std::int8_t int8;
typedef std::int16_t int16;
typedef std::int32_t int32;
typedef std::int64_t int64;
typedef std::uint8_t uint8;
typedef std::uint16_t uint16;
typedef std::uint32_t uint32;
typedef std::uint64_t uint64;
namespace detail
{
inline Endian getSoftwareEndian()
{
int16 witness = 0x5501;
int8 test = *((int8*)&witness);
return (test == 1 ? Endian::LittleEndian : Endian::BigEndian);
}
}
SERY_END_NAMESPACE
#endif // SERY_MISC_HH_
Sery/Buffer.hh
#ifndef SERY_BUFFER_HH_
#define SERY_BUFFER_HH_
#include <vector>
#include <Sery/IBuffer.hh>
SERY_BEGIN_NAMESPACE
class Buffer : public IBuffer
{
public:
Buffer();
Buffer(const char *buffer, uint32 size);
~Buffer();
public:
virtual void writeRaw(const char *buffer, uint32 size) final;
virtual void readRaw(char *buffer, uint32 size) final;
virtual uint32 size() const final;
virtual const char *data() const final;
private:
std::vector<char> _buffer;
};
SERY_END_NAMESPACE
#endif // SERY_BUFFER_HH_
Sery/Buffer.cpp
#include <Sery/Buffer.hh>
#include <sstream>
#include <iomanip>
#include <string.h>
SERY_BEGIN_NAMESPACE
Buffer::Buffer()
: _buffer()
{
}
Buffer::Buffer(const char *buffer, uint32 size)
: _buffer(buffer, buffer + size)
{
}
Buffer::~Buffer()
{
}
void Buffer::writeRaw(const char *buffer, uint32 size)
{
_buffer.insert(_buffer.end(), buffer, buffer + size);
}
void Buffer::readRaw(char *buffer, uint32 size)
{
memcpy(buffer, _buffer.data(), size);
_buffer.erase(_buffer.begin(), _buffer.begin() + size);
}
uint32 Buffer::size() const
{
return ((uint32)_buffer.size());
}
const char *Buffer::data() const
{
return (_buffer.data());
}
SERY_END_NAMESPACE
Sery/Stream.hh
#ifndef SERY_STREAM_HH_
#define SERY_STREAM_HH_
#include <Sery/Misc.hh>
SERY_BEGIN_NAMESPACE
class IBuffer;
class Stream
{
private:
// Removing copy and move functions
Stream(const Stream&) = delete;
Stream(Stream&&) = delete;
Stream &operator=(const Stream&) = delete;
Stream &operator=(Stream&&) = delete;
private:
static Endian globalEndian;
public:
static void setGlobalEndian(Endian endian);
static Endian getGlobalEndian();
public:
// Must be constructed with an IBuffer
Stream(IBuffer &buffer);
~Stream();
public:
// Proxies for lower level manipulation
Stream &writeRaw(const char *buffer, uint32 size);
Stream &readRaw(char *buffer, uint32 size);
public:
void setLocalEndian(Endian endian);
Endian getLocalEndian() const;
private:
IBuffer &_buffer;
Endian _localEndian;
};
// (De)Serialization of a C-Style string
Stream &operator<<(Stream &stream, const char *str);
Stream &operator>>(Stream &stream, char *&str);
// (De)Serialization of all arithmetic types
template <class T, enable_if_t<std::is_arithmetic<T>::value> * = nullptr>
Stream &operator<<(Stream &stream, T t);
template <class T, enable_if_t<std::is_arithmetic<T>::value> * = nullptr>
Stream &operator>>(Stream &stream, T &t);
SERY_END_NAMESPACE
#include "detail/Stream_STD.hh" // Contains serialization for standard library
#endif // SERY_STREAM_HH_
Sery/Stream.cpp
#include <Sery/Stream.hh>
#include <Sery/IBuffer.hh>
#include <cstring>
#include <iostream>
SERY_BEGIN_NAMESPACE
Stream::Stream(IBuffer &buffer)
: _buffer(buffer),
_localEndian(Stream::globalEndian)
{
}
Stream::~Stream()
{
}
Stream &Stream::writeRaw(const char *buffer, uint32 size)
{
_buffer.writeRaw(buffer, size);
return (*this);
}
Stream &Stream::readRaw(char *buffer, uint32 size)
{
_buffer.readRaw(buffer, size);
return (*this);
}
void Stream::setLocalEndian(Endian endian)
{
_localEndian = endian;
}
Endian Stream::getLocalEndian() const
{
return (_localEndian);
}
// Static members
Endian Stream::globalEndian = Endian::BigEndian;
void Stream::setGlobalEndian(Endian endian)
{
globalEndian = endian;
}
Endian Stream::getGlobalEndian()
{
return (globalEndian);
}
//////////////////////////////////////////
// External functions for serialization //
//////////////////////////////////////////
// Templates for serializing arithmetics types
template <class T, enable_if_t<std::is_arithmetic<T>::value> *>
Stream &operator<<(Stream &stream, T t)
{
Endian softwareEndian = detail::getSoftwareEndian();
Endian currentEndian = stream.getLocalEndian();
char buffer[sizeof(T)];
const uint8 *p = reinterpret_cast<const uint8 *>(&t);
for (size_t index = 0;
index < sizeof(T);
++index)
{
if (currentEndian == softwareEndian)
buffer[index] = *p++;
else
buffer[sizeof(T) - index - 1] = *p++;
}
stream.writeRaw(buffer, sizeof(T));
return (stream);
}
// Explicit instantiations of templates functions
template Stream &operator<< <int8> (Stream &, int8);
template Stream &operator<< <int16> (Stream &, int16);
template Stream &operator<< <int32> (Stream &, int32);
template Stream &operator<< <int64> (Stream &, int64);
template Stream &operator<< <uint8> (Stream &, uint8);
template Stream &operator<< <uint16> (Stream &, uint16);
template Stream &operator<< <uint32> (Stream &, uint32);
template Stream &operator<< <uint64> (Stream &, uint64);
template Stream &operator<< <float> (Stream &, float);
template Stream &operator<< <double> (Stream &, double);
template Stream &operator<< <long double> (Stream &, long double);
// Templates for deserializing arithmetics types
template <class T, enable_if_t<std::is_arithmetic<T>::value> *>
Stream &operator>>(Stream &stream, T &t)
{
Endian softwareEndian = detail::getSoftwareEndian();
Endian currentEndian = stream.getLocalEndian();
char buffer[sizeof(T)];
stream.readRaw(buffer, sizeof(T));
uint8 *p = reinterpret_cast<uint8 *>(&t);
for (size_t index = 0;
index < sizeof(T);
++index)
{
if (currentEndian == softwareEndian)
*p++ = buffer[index];
else
*p++ = buffer[sizeof(T) - index - 1];
}
return (stream);
}
// Explicit instantiations of templates functions
template Stream &operator>> <int8> (Stream &, int8 &);
template Stream &operator>> <int16> (Stream &, int16 &);
template Stream &operator>> <int32> (Stream &, int32 &);
template Stream &operator>> <int64> (Stream &, int64 &);
template Stream &operator>> <uint8> (Stream &, uint8 &);
template Stream &operator>> <uint16> (Stream &, uint16 &);
template Stream &operator>> <uint32> (Stream &, uint32 &);
template Stream &operator>> <uint64> (Stream &, uint64 &);
template Stream &operator>> <float> (Stream &, float &);
template Stream &operator>> <double> (Stream &, double &);
template Stream &operator>> <long double> (Stream &, long double &);
// (De)Serialization of C-Style strings
Stream &operator<<(Stream &stream, const char *str)
{
uint32 len = (uint32)std::strlen(str) + 1;
stream << len;
stream.writeRaw(str, len);
return (stream);
}
Stream &operator>>(Stream &stream, char *&str)
{
uint32 len = 0;
stream >> len;
char *buffer = new char[len];
stream.readRaw(buffer, len);
str = buffer;
return (stream);
}
SERY_END_NAMESPACE
3 Answers 3
A few notes:
- stop using C-style casts; Better alternative exists
- stop wraping return expressions in parenthesis (every time I see that I think "this is a C-style value cast ... no wait!")
- do not generate code using macros; the namespace macros should only be used if you are going to build on a platform that has no namespace support (are you? you didn't specify)
your setter and getter for GlobalEndian only set and get the value;
Better solution: consider setting the value itself public and removing the accessors (they don't add anything and effectively they expose the value as if it were public).
Even better solution: Ensure that each class has it's own (private) copy of the endianness flag and initialize the private copy upon construction (with a default value); Instances should have no public getter or setter. This is because you cannot change the endianness in the middle of writing to the stream and still expect the written data to be valid.
The way you arrange the code by columns makes it (marginally) easier to read, but over the lifetime of a project, you will either get some tokens out of sync with each other, or have to realign entire lists of functions when you change the length of an identifier; it is debatable if the extra ease in reading the code is worth it - because once you get used to reading untabulated code, the extra alignment adds nothing but extra maintenance effort).
-
\$\begingroup\$ Thanks for you comment, I really appreciate it! I'll answer in different comments to keep things clear. Firstly : about the C-style casts, you're totally right, I should avoid them. But since
static_cast
andreinterpret_cast
are so long to write, I think about making proxies functions likes_c
andr_c
. But it really ruins the readability. \$\endgroup\$Telokis– Telokis2015年07月29日 11:32:29 +00:00Commented Jul 29, 2015 at 11:32 -
\$\begingroup\$ About return expressions wrapped in parenthesis, I am really used to it. Do you really think it is that bad? I think it really depends on what people are used to. \$\endgroup\$Telokis– Telokis2015年07月29日 11:34:31 +00:00Commented Jul 29, 2015 at 11:34
-
\$\begingroup\$ The casts are long by design. They are supposed to be large and explicit in code, making them easy to see. Ideally, you should write code that doesn't need them at all; when you use them in a lot of places, that is usually a sign that your architecture and APIs should be improved; As such, usually when you have casts, the solution is to refactor, not to alias them to something smaller (because that doesn't actually solve the problem). \$\endgroup\$utnapistim– utnapistim2015年07月29日 11:35:21 +00:00Commented Jul 29, 2015 at 11:35
-
\$\begingroup\$ Code generation using macro is quite useful when it comes to MetaProgramming. However, you are right, there is no point in wrapping the namespace declaration inside a macro like I did there. It simply prevented my IDE from indenting inside the namespace. \$\endgroup\$Telokis– Telokis2015年07月29日 11:36:03 +00:00Commented Jul 29, 2015 at 11:36
-
\$\begingroup\$ The purpose of the parenthesis is to allow you to write
#define return (x)do_some_operation(x); return x
(or something similar). It is a remnant of old C-style code, from an era when templates and classes didn't exist. In modern code they add nothing (but cruft) because RAII makes such code obsolete. \$\endgroup\$utnapistim– utnapistim2015年07月29日 11:39:06 +00:00Commented Jul 29, 2015 at 11:39
Another few notes:
- You can use
#pragma once
instead of include guards. This is a non-standard but widely supported preprocessor directive. Most of the modern compilers support it. - IMHO,
namespace Sery { }
is much more clearer thanSERY_BEGIN_NAMESPACE / SERY_END_NAMESPACE
. Also consider writing} // namespace Sery
at the bottom of the file for clarity. - Consider marking Buffer class as final. Otherwise your
Buffer::~Buffer()
should have been either either public and virtual, or protected and non-virtual (Sutter's rule). The same forStream
class. - I also agree with @utnapistim about code formatting.
- The leading underscores for private members do not break any rules, but it is close to it (e.g. if you start an identifier with a capital letter). Consider using trailing underscore for that matter. You'll get used to it soon.
-
2\$\begingroup\$ I don't really like
pragma once
because it is non-standard. I try to avoid non-standard things as much as possible. \$\endgroup\$Telokis– Telokis2015年07月31日 08:44:46 +00:00Commented Jul 31, 2015 at 8:44 -
\$\begingroup\$ Yeah, I will remove the
#define SERY_BEGIN_NAMESPACE
from the code and use the "classical" way. \$\endgroup\$Telokis– Telokis2015年07月31日 08:45:14 +00:00Commented Jul 31, 2015 at 8:45 -
\$\begingroup\$ You're right, I should have made
Sery::Buffer
andSery::Stream
final since they are not meant to be inherited from. \$\endgroup\$Telokis– Telokis2015年07月31日 08:45:48 +00:00Commented Jul 31, 2015 at 8:45 -
\$\begingroup\$ About the leading underscore, as you said, it doesn't break any rule. Thus, I don't feel bad about doing it. I like it in front of the name because it allows me to know instantly where it comes from when I read the code. \$\endgroup\$Telokis– Telokis2015年07月31日 08:47:22 +00:00Commented Jul 31, 2015 at 8:47
-
2\$\begingroup\$ Btw, if you don't trust
#pragma once
because it's non-standard, you should consider using it in combination with include guards. It saves compilation time. If#pragma once
is supported, compiler will stop parsing the file as soon as the directive is found, which is not the case for include guards. \$\endgroup\$MaksymB– MaksymB2015年07月31日 10:38:20 +00:00Commented Jul 31, 2015 at 10:38
reinterpret_cast to uint8*
violates strict aliasing rules and lead to undefined behavior. Use char*
as it is granted exception in the standard.
htonl()
and family. In addition toEndianess
usally floating point values are not stored in IEEE-754 Interchange formats internally. They are usually IEEE-754 but one of the "Extended precision formats" and you can not translate directly to other platforms binary formats. You should convert to one of the standard Interchange formats before writing to your stream. \$\endgroup\$long double
is 128 bits but only 96 bits are used (because that is the size of the the FP register. On the virtual unix machine I run on my mac. The `long double is 128 bits. But only 80 bits are used. See: github.com/Loki-Astari/ThorsSerializer/blob/master/src/… \$\endgroup\$Sery::globalEndian
beingBigEndian
by default. \$\endgroup\$std::numeric_limits<T>::is_iec559
. Then check the size of your float withsizeof()
then check and the number of digits.std::numeric_limits<T>::digits
. With the size and number of digits you can check if is an IEEE-754 interchange format. If it is then you don't need to do anything. If it is not then unless you really want the support just throw an error. This will wolve 99.99% of situations. \$\endgroup\$