A while ago I posted the code for this library I'm working on, and have refactored the code quite a bit ever since. I would appreciate any feedback in regards to what I have so far to see what can I improve still.
binaryio/reader.h
#include <iostream>
#include <ranges>
#include <stdexcept>
#include <string>
#include <vector>
#include "binaryio/swap.h"
#ifndef BINARYIO_READER_H
#define BINARYIO_READER_H
namespace binaryio {
class BinaryReader {
std::istream& is;
public:
BinaryReader(std::istream& is) : is{is} {}
BinaryReader(std::istream& is, endian byte_order)
: is{is}, m_endian{byte_order} {}
void seek(std::size_t offset,
std::ios_base::seekdir seek_dir = std::ios_base::beg)
{
is.seekg(offset, seek_dir);
}
std::size_t tell() { return is.tellg(); }
template <typename T>
T read()
requires (!std::is_pointer_v<T> && std::is_trivially_copyable_v<T>)
{
T val;
_do_read(val);
return val;
}
template <typename T>
std::vector<T> read_many(const std::size_t count)
requires (!std::is_pointer_v<T> && std::is_trivially_copyable_v<T>)
{
std::vector<T> vals(count);
_do_read(vals);
return vals;
}
std::string
read_string(const std::string::size_type max_len = std::string::npos)
{
std::string str;
if (max_len == std::string::npos)
return str;
char arr[max_len];
is.getline(arr, max_len, '0円');
str = arr;
return str;
}
endian endianness() const { return m_endian; }
void set_endianness(endian new_endian) { m_endian = new_endian; }
void swap_endianness()
{
if (m_endian == endian::big)
m_endian = endian::little;
else
m_endian = endian::big;
}
private:
endian m_endian {endian::native};
template <typename T>
void _do_read(T& val)
{
if (!is.read(reinterpret_cast<char*>(&val), sizeof val))
throw std::out_of_range("out of bounds read");
swap_if_needed_in_place(val, m_endian);
}
template <class Range, typename T = typename Range::value_type>
void _do_read(Range& vals)
requires (std::ranges::output_range<Range, T>)
{
if (!is.read(reinterpret_cast<char*>(vals.data()), vals.size() * sizeof(T)))
throw std::out_of_range("out of bounds read");
for (auto& val : vals)
swap_if_needed_in_place(val, m_endian);
}
};
} // namespace binaryio
#endif
binaryio/swap.h
#include <bit>
#include <cstring>
#include <tuple>
#include <type_traits>
#include "binaryio/type_utils.h"
#ifndef BINARYIO_SWAP_H
#define BINARYIO_SWAP_H
namespace binaryio {
using endian = std::endian;
// Bitswap the bytes of a value
template <typename T>
constexpr void swap(T& value)
{
if (sizeof(T) == 1)
return;
std::array<char, sizeof(T)> data;
std::memcpy(data.data(), &value, sizeof(T));
for (int i{0}; i < sizeof(T)/2; ++i)
std::swap(data[sizeof(T) - 1 - i], data[i]);
std::memcpy(&value, data.data(), sizeof(T));
}
/// Swap a value if its endianness is not the same as the machine endianness.
/// @param endian The endianness of the value.
template <typename T>
void swap_if_needed_in_place(T& value, endian endian)
{
if (endian::native == endian)
return;
if constexpr (std::is_arithmetic<T>()) {
swap(value);
}
if constexpr (exposes_fields<T>) {
std::apply(
[endian](auto&... fields) {
(swap_if_needed_in_place(fields, endian), ...);
},
value.fields());
}
if constexpr (is_container<T>) {
for (auto& val : value)
swap(val);
}
}
template <typename T>
T swap_if_needed(T value, endian endian)
{
swap_if_needed_in_place(value, endian);
return value;
}
} // namespace binaryio
#endif
binaryio/type_utils.h
#include <string_view>
#include <tuple>
#include <type_traits>
#ifndef BINARYIO_TYPE_UTILS_H
#define BINARYIO_TYPE_UTILS_H
namespace binaryio {
#define BINARYIO_DEFINE_FIELDS(TYPE, ...) \
constexpr auto fields() { return std::tie(__VA_ARGS__); } \
constexpr auto fields() const { return std::tie(__VA_ARGS__); } \
template <typename T>
concept exposes_fields = requires (T a)
{
a.fields();
};
template <typename C>
concept is_container = requires (C a)
{
C::value_type;
C::reference;
C::const_reference;
C::iterator;
C::const_iterator;
C::difference_type;
C::size_type;
a.begin();
a.end();
a.cbegin();
a.size();
a.max_size();
a.empty();
};
} // namespace binaryio
#endif
binaryio/writer.h
#include <iostream>
#include <string_view>
#include <vector>
#include "binaryio/swap.h"
#ifndef BINARYIO_WRITER_H
#define BINARYIO_WRITER_H
namespace binaryio {
class BinaryWriter {
std::ostream& os;
public:
// Based on
// https://github.com/zeldamods/oead/blob/master/src/include/oead/util/binary_reader.h
BinaryWriter(std::ostream& os, endian byte_order)
: os{os}, m_endian{byte_order} {};
void seek(std::size_t offset,
std::ios_base::seekdir seek_dir = std::ios_base::beg)
{
os.seekp(offset, seek_dir);
}
std::size_t tell() { return os.tellp(); }
template <typename T>
void write(T value)
requires (!std::is_pointer_v<T> && std::is_trivially_copyable_v<T>)
{
swap_if_needed_in_place(value, m_endian);
os.write(reinterpret_cast<char*>(&value), sizeof(T));
}
template <class Range, typename T = typename Range::value_type>
void write_many(Range& values)
requires (std::ranges::output_range<Range, T>)
{
for (auto& value : values)
swap_if_needed_in_place(value, m_endian);
os.write(reinterpret_cast<char*>(values.begin().base()), values.size() * sizeof(T));
}
void write_string(std::string_view str)
{
os.write(str.data(), str.size());
}
endian endianness() { return m_endian; }
void set_endianness(endian new_endian) { m_endian = new_endian; }
void swap_endianness()
{
if (m_endian == endian::big)
m_endian = endian::little;
else
m_endian = endian::big;
}
private:
endian m_endian{endian::native};
};
} // namespace binaryio
#endif
Unit Test
#include <filesystem>
#include <fstream>
#include <iostream>
#include "binaryio/reader.h"
#include "binaryio/writer.h"
struct FileHeader {
uint32_t magic;
uint32_t fileSize;
BINARYIO_DEFINE_FIELDS(FileHeader, magic, fileSize);
};
struct WaveFile {
FileHeader riffHeader;
std::array<uint8_t, 4> magic;
std::array<uint8_t, 4> fmt;
uint32_t fmtSize;
uint16_t audioFormat;
uint16_t numChannels;
uint32_t sampleRate;
uint32_t byteRate;
uint16_t blockAlign;
uint16_t bitsPerSample;
uint8_t pad[0x8a];
std::array<uint8_t, 4> dataMagic;
uint32_t dataSize;
// Data starts
BINARYIO_DEFINE_FIELDS(WaveFile, riffHeader, magic, fmt, fmtSize,
audioFormat, numChannels, sampleRate, byteRate,
blockAlign, bitsPerSample, dataMagic, dataSize);
} __attribute__((packed));
int main(int argc, char** argv)
try {
// Read from the byte buffer
std::ifstream ifs{argv[1]};
binaryio::BinaryReader reader{ifs};
WaveFile wav{reader.read<WaveFile>()};
std::vector<char> data{reader.read_many<char>(wav.dataSize)};
ifs.close();
std::cout << "File 1 read" << '\n';
// Print to stdout to see what was read
std::cout << "Riff Magic: " << std::hex << wav.riffHeader.magic
<< '\n';
std::cout << "Wave Magic: " << wav.magic.data() << '\n';
// Write a new file
std::ofstream ofs{"out_litte.wav"};
binaryio::BinaryWriter writer{ofs, binaryio::endian::little};
writer.write(wav);
writer.write_many(data);
ofs.close();
std::cout << "File 2 written" << '\n';
// Write a different file with its endianness swapped
ofs.open("out_big.wav");
writer.swap_endianness();
writer.write(wav);
writer.write_many(data);
ofs.close();
std::cout << "File 3 written" << '\n';
// Read the new file, and compare the result with the original struct,
// treating the new values as big endian, since endianness was swapped
ifs.open("out_big.wav");
if (reader.read<uint32_t>() == 0x52494646) {
reader.swap_endianness();
ifs.seekg(0);
}
WaveFile other_wav{reader.read<WaveFile>()};
ifs.close();
std::cout << "File 3 read" << '\n';
std::cout << "Riff Magic: " << std::hex << other_wav.riffHeader.magic
<< '\n';
std::cout << "Wave Magic: " << other_wav.magic.data() << '\n';
if (wav.sampleRate == other_wav.sampleRate)
std::cout << "Data preserved, endianness swapped" << std::endl;
else
throw std::runtime_error(
"Something went wrong and the data was changed");
return 0;
} catch (std::runtime_error& err) {
std::cerr << err.what() << std::endl;
return 1;
}
```
1 Answer 1
Code Review:
Naming:
class BinaryReader {
std::istream& is;
// Lots of methods:
endian m_endian {endian::native};
Personal opinion:
I like to group all the member variables together (for me this helps me make sure that I set all members in the constructor).
Another personal opinion:
I don't like your naming scheme. One variable is prefixed by m_
but the other is simply the name (though a bit short - self documenting names is considered a good practice). Pick one style.
BinaryReader(std::istream& is, endian byte_order)
: is{is}, m_endian{byte_order} {}
Why is it called byte_order
as a parameter and m_endian
as a member? Seems a bit confusing.
Not sure I understand the logic of the default parameter here:
std::string
read_string(const std::string::size_type max_len = std::string::npos)
{
std::string str;
if (max_len == std::string::npos)
return str;
If you don't provide a parameter it simply returns a string?
This is technically not valid C++ (it's a common extension implemented by a lot of compilers):
char arr[max_len];
Also you make a copy of the array in a couple of lines:
str = arr;
Why not skip the copy? Create a string of the correct size and read directly into it.
// Code does not include error check. Left as exercise.
std::string arr(max_len, '0円');
is.getline(arr.data(), max_len, '0円');
arr.resize(is.gcount());
You are using the wrong Range
type here.
This is an input function so std::ranges::output_range
is incorrect (it should be std::ranges::input_range
).
template <class Range, typename T = typename Range::value_type>
void _do_read(Range& vals)
requires (std::ranges::output_range<Range, T>)
{
But input_range
is still not correct. You are not using the ++
operator to move from one element to the next. You are assuming that all the elements are contiguous to each other. So you should be using std::ranges::contiguous_range<T>
.
if (!is.read(reinterpret_cast<char*>(vals.data()), vals.size() * sizeof(T)))
throw std::out_of_range("out of bounds read");
Note this will only read max_len-1
characters from the stream.
is.getline(arr, max_len, '0円');
The last item in the array will be reserved for the null terminator.
Sure you can write it like this:
if (m_endian == endian::big)
m_endian = endian::little;
else
m_endian = endian::big;
Personally, I would do it as:
m_endian = (m_endian == endian::big) ? endian::little : endian::big;
You copy the data to and from a buffer. Why not swap in place?
template <typename T>
constexpr void swap(T& value)
{
if (sizeof(T) == 1)
return;
std::array<char, sizeof(T)> data;
std::memcpy(data.data(), &value, sizeof(T));
for (int i{0}; i < sizeof(T)/2; ++i)
std::swap(data[sizeof(T) - 1 - i], data[i]);
std::memcpy(&value, data.data(), sizeof(T));
}
I would simply do this:
template <typename T>
constexpr void swap(T& value)
{
char* data = reinterpret_cast<char*>(&value);
for (int i{0}; i < sizeof(T)/2; ++i)
std::swap(data[sizeof(T) - 1 - i], data[i]);
}
This is line is currently beyond me :-) I need to study but assume it is OK.
if constexpr (exposes_fields<T>) {
std::apply(
[endian](auto&... fields) {
(swap_if_needed_in_place(fields, endian), ...);
},
value.fields());
}
To prevent a copy, pass by reference and return the reference.
template <typename T>
T swap_if_needed(T value, endian endian)
{
swap_if_needed_in_place(value, endian);
return value;
}
Hold on. In the read version don't you look for a terminating 0円
character? In this case you should write that 0円
character to the output stream.
void write_string(std::string_view str)
{
os.write(str.data(), str.size()); // You need + 1 on the size.
}
require
is a pretty recent addition C++20. \$\endgroup\$