I wrote a simple library for reading RIFF-encoded files in C++.
I tried to adhere to modern practices and provide an idiomatic API, but there is one part that sticks like a sore thumb to me: the way you get the contents of a "chunk" is by calling a data()
method, which returns a std::vector<char>
. Most of the other ways of accessing the data do not require full reading from the stream, this is the only exception.
I'd like to know if there is a better way of accomplishing that.
riffcpp.hpp:
#ifndef RIFFCPP_H
#define RIFFCPP_H
#include <array>
#include <cstdint>
#include <istream>
#include <iterator>
#include <vector>
namespace riffcpp {
/**
Represents a FourCC
This is a sequence of four bytes used to identify the various types of RIFF
chunks
*/
using FourCC = std::array<char, 4>;
/// The `RIFF` FourCC, used to identify toplevel chunks
constexpr FourCC riff_id = {'R', 'I', 'F', 'F'};
/// The `LIST` FourCC, used to identify chunks that contain other chunks
constexpr FourCC list_id = {'L', 'I', 'S', 'T'};
class ChunkIt;
/**
Represents a RIFF chunk
Every chunk has a four byte identifier (FourCC) and some contents.
Depending on the value of the identifier, the chunk may contain other chunks
as its contents, and in those cases a second FourCC is used to distinguish
the chunk type.
*/
class Chunk {
std::istream &m_stream;
std::streampos m_pos;
public:
/**
Reads a chunk from the specified stream position
The chunk's data is not read initially, it is only loaded when requested
via the various methods provided.
The stream needs to be able to seek to arbitrary positions.
*/
Chunk(std::istream &stream, std::streampos pos);
/// The chunk's identifier
FourCC id();
/// If the chunk contains other chunks, this is its type FourCC
FourCC type();
/// Returns the size of the chunk's contents in bytes
std::uint32_t size();
/**
If this chunk contains other chunks, returns an iterator to the first
chunk contained
`no_chunk_id` is used for chunks which have no chunk id but still contain
subchunks, like `seqt` from DirectMusic
*/
ChunkIt begin(bool no_chunk_id = false);
/**
If this chunk contains other chunks, returns an iterator pointing past the
last chunk contained
*/
ChunkIt end();
/**
Returns the raw contents of the chunk
*/
std::vector<char> data();
};
/**
Provides a way to iterate over subchunks
*/
class ChunkIt {
std::streampos m_pos; ///< Position of the chunk in the stream
std::istream &m_stream; ///< Stream of the chunk
public:
/// Creates an iterator starting from the specified stream position
ChunkIt(std::istream &stream, std::streampos pos);
/// Returns whether two iterators point to the same chunk
bool operator==(const ChunkIt &a) const;
/// Returns whether two iterators do not point to the same chunk
bool operator!=(const ChunkIt &a) const;
/// Returns the chunk pointed by the iterator
Chunk operator*() const;
/// Moves the iterator ahead, to point to the following iterator
ChunkIt &operator++();
/**
Moves the iterator ahead, to point to the following iterator and returns
an iterator to the current position
*/
ChunkIt operator++(int);
};
} // namespace riffcpp
namespace std {
template <> struct iterator_traits<riffcpp::ChunkIt> {
using value_type = riffcpp::Chunk;
using pointer = riffcpp::Chunk *;
using iterator_category = std::input_iterator_tag;
};
} // namespace std
#endif // RIFFCPP_H
riffcpp.cpp:
#include <riffcpp.hpp>
riffcpp::Chunk::Chunk(std::istream &stream, std::streampos pos)
: m_stream(stream), m_pos(pos) {}
riffcpp::FourCC riffcpp::Chunk::id() {
m_stream.seekg(m_pos);
riffcpp::FourCC read_id;
m_stream.read(read_id.data(), read_id.size());
return read_id;
}
std::uint32_t riffcpp::Chunk::size() {
std::streamoff offs{4};
m_stream.seekg(m_pos + offs);
uint32_t read_size;
m_stream.read(reinterpret_cast<char *>(&read_size), 4);
return read_size;
}
riffcpp::FourCC riffcpp::Chunk::type() {
std::streamoff offs{8};
m_stream.seekg(m_pos + offs);
riffcpp::FourCC read_type;
m_stream.read(read_type.data(), read_type.size());
return read_type;
}
std::vector<char> riffcpp::Chunk::data() {
std::streamoff offs{8};
m_stream.seekg(m_pos + offs);
std::uint32_t data_size = size();
std::vector<char> read_data;
read_data.resize(data_size);
m_stream.read(read_data.data(), data_size);
return read_data;
}
riffcpp::ChunkIt riffcpp::Chunk::begin(bool no_chunk_id) {
std::streamoff offs{no_chunk_id ? 8 : 12};
return riffcpp::ChunkIt(m_stream, m_pos + offs);
}
riffcpp::ChunkIt riffcpp::Chunk::end() {
std::uint32_t sz = size();
std::streamoff offs{sz + sz % 2 + 8};
return riffcpp::ChunkIt(m_stream, m_pos + offs);
}
riffcpp::ChunkIt::ChunkIt(std::istream &stream, std::streampos pos)
: m_stream(stream), m_pos(pos) {}
bool riffcpp::ChunkIt::operator==(const ChunkIt &a) const {
return m_pos == a.m_pos;
}
bool riffcpp::ChunkIt::operator!=(const ChunkIt &a) const {
return !(*this == a);
}
riffcpp::Chunk riffcpp::ChunkIt::operator*() const {
return riffcpp::Chunk(m_stream, m_pos);
}
riffcpp::ChunkIt &riffcpp::ChunkIt::operator++() {
riffcpp::Chunk chunk(m_stream, m_pos);
std::uint32_t sz = chunk.size();
std::streamoff offs{sz + sz % 2 + 8};
m_pos += offs;
return *this;
}
riffcpp::ChunkIt riffcpp::ChunkIt::operator++(int) {
riffcpp::ChunkIt it(m_stream, m_pos);
riffcpp::Chunk chunk(m_stream, m_pos);
std::uint32_t sz = chunk.size();
std::streamoff offs{sz + sz % 2 + 8};
m_pos += offs;
return it;
}
Example usage:
#include <cstdint>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <riffcpp.hpp>
void print_indent(int indent) {
for (int j = 0; j < indent; j++) {
std::cout << " ";
}
}
void print_hex_dump(std::vector<char> &data, int indent) {
int i = 0;
for (char c : data) {
if (i % 16 == 0) {
print_indent(indent);
}
std::cout << std::setfill('0') << std::setw(2) << std::hex
<< (int)((unsigned char)c) << ' ';
if (i % 16 == 15) {
std::cout << '\n';
}
i++;
}
if (i % 16 != 0) {
i = i % 16;
for (; i < 16; i++) {
std::cout << "-- ";
}
}
std::cout << std::dec << '\n';
}
void print_chunks(riffcpp::Chunk &ch, int offs = 0) {
auto id = ch.id(); // Reads the chunk's id
auto size = ch.size(); // Reads the chunk's size
if (id == riffcpp::riff_id || id == riffcpp::list_id) {
// The chunk is either a 'RIFF' or a 'LIST', so it contains subchunks
print_indent(offs);
auto type = ch.type(); // Reads the chunk's type
std::cout << std::string(id.data(), 4) << " " << std::string(type.data(), 4)
<< " size: " << size << "\n";
// Iterate subchunks
for (auto ck : ch) {
print_chunks(ck, offs + 1);
}
} else {
// The chunk is an unknown type, provide an hexdump
auto data = ch.data();
print_indent(offs);
std::cout << std::string(id.data(), 4) << " size: " << size << "\n";
print_hex_dump(data, offs + 1);
}
}
int main(int argc, char *argv[]) {
std::ifstream stream(argv[1], std::ios::binary);
// Read the chunk from the current position
riffcpp::Chunk ch(stream, stream.tellg());
print_chunks(ch);
}
Link to GitHub repo: https://github.com/frabert/riffcpp
1 Answer 1
There's a serious bug here:
uint32_t read_size;
m_stream.read(reinterpret_cast<char *>(&read_size), 4);
When this code is compiled for a target whose endianness matches the file endianness, I can imagine this working. However, when the endianness is opposite, you'll have results that look very different to what's expected.
Instead of specialising std::iterator_traits
, it's usually much easier to just provide suitable member types and let the unspecialised template just do its thing:
class ChunkIt {
using value_type = riffcpp::Chunk;
using reference = value_type&;
using pointer = value_type*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::input_iterator_tag;
...
};
It's conventional to make the iterator an inner type Chunk::iterator
. It might be a good idea to include a const_iterator
type, too.
The code duplication in the iterator can be reduced. For example, we should implement post-increment in terms of pre-increment and a temporary.
-
\$\begingroup\$ The endianness bug was already known to me, but I couldn't find a workaround that was both standards-compliant and not too longwinded. Regarding the
const_iterator
, I thought about it for a minute but couldn't understrand how to implement it, given that moving the iterator involves mutating the underlying stream... Any suggestion regarding thedata()
method in Chunk? \$\endgroup\$Francesco Bertolaccini– Francesco Bertolaccini2019年04月03日 15:15:02 +00:00Commented Apr 3, 2019 at 15:15