I needed to write zlib wrapper class for my application, so I can change it with different algorithm later if I want to do so.
Here is the simple version:
struct ZLibEasyCompressor{
bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
};
struct ZLibEasyDeCompressor{
bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
};
// ===========================
bool ZLibEasyCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
if (data_in == nullptr || size_in == 0 || data_out == nullptr || size_out == 0)
return false;
z_stream zs;
memset(&zs, 0, sizeof(z_stream));
if ( deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK )
return false;
zs.next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data_in ) );
zs.avail_in = static_cast<uInt>( size_in );
zs.next_out = reinterpret_cast<Bytef *>( data_out );
zs.avail_out = static_cast<uInt>( size_out );
int const result = deflate(&zs, Z_FINISH);
size_out = zs.total_out;
deflateEnd(&zs);
return result == Z_STREAM_END;
}
// ===========================
bool ZLibEasyDeCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
if (data_in == nullptr || size_in == 0 || data_out == nullptr || size_out == 0)
return false;
z_stream zs;
memset(&zs, 0, sizeof(z_stream));
if ( inflateInit(&zs) != Z_OK )
return false;
zs.next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data_in ) );
zs.avail_in = static_cast<uInt>( size_in );
zs.next_out = reinterpret_cast<Bytef *>( data_out );
zs.avail_out = static_cast<uInt>( size_out );
int const result = inflate(&zs, Z_FINISH);
size_out = zs.total_out;
inflateEnd(&zs);
return result == Z_STREAM_END;
}
This all works well, but:
- have repetition in the code that can be omit using templates or something.
- it initialize the control structure
z_stream
every time. As we can see later, this degrade the performance with 33%.
However, if I move z_stream
to header file, there is a huge problem - I need to include zlib.h
into the header and it will "spill" lots of constants and defines all around.
I wanted to avoid this at all costs:
https://stackoverflow.com/questions/39534366/how-to-keep-global-namespace-clean-from-third-party-lib
The dynamic allocation with opaque / void pointer can fix this, but there will be one more call for allocation / de-allocation.
This is why I did something crazy - I used a buffer + cast (we discussed it as placement new).
So here is the final version. It is:
- no runtime overhead. I compared against plain C version and speed is same.
- faster compression when you reuse the same class.
- 33% - 40% faster decompression when you reuse the same class (depends of optimization).
- no code repetition.
- will refuse to compile if something is changed in
z_stream
.
zlib.h
#ifndef ZLIB_COMPRESSOR_H_
#define ZLIB_COMPRESSOR_H_
#include <cstddef>
struct ZLibBase_{
constexpr static size_t BUFFER_SIZE = 128;
};
class ZLibCompressor : public ZLibBase_{
public:
const static int NO_COMPRESSION ;
const static int BEST_SPEED ;
const static int BEST_COMPRESSION ;
const static int DEFAULT_COMPRESSION ;
public:
ZLibCompressor(int compressionLevel = DEFAULT_COMPRESSION);
~ZLibCompressor();
bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
private:
char buffer_[BUFFER_SIZE];
};
// ===========================
class ZLibDeCompressor : public ZLibBase_{
public:
ZLibDeCompressor();
~ZLibDeCompressor();
bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
private:
char buffer_[BUFFER_SIZE];
};
#endif
zlib.cc
#include "zlib.h"
#include <stdexcept>
#include <cstring>
#include <zlib.h>
template<bool OP>
struct ZLibOpsBase_;
template<>
struct ZLibOpsBase_<true>{
static bool init(z_stream *zs, int const compressionLevel){
return deflateInit(zs, compressionLevel) == Z_OK;
}
static int done(z_stream *zs){
return deflateEnd(zs);
}
static int reset(z_stream *zs){
return deflateReset(zs);
}
static int process(z_stream *zs){
return deflate(zs, Z_FINISH);
}
};
// ===========================
template<>
struct ZLibOpsBase_<false>{
static bool init(z_stream *zs, int const){
return inflateInit(zs) == Z_OK;
}
static int done(z_stream *zs){
return inflateEnd(zs);
}
static int reset(z_stream *zs){
return inflateReset(zs);
}
static int process(z_stream *zs){
return inflate(zs, Z_FINISH);
}
};
// ===========================
template<bool OP>
struct ZLibOps_{
static void init(z_stream *zs, int const compressionLevel){
memset(zs, 0, sizeof(z_stream));
int const result = ZLibOpsBase_<OP>::init(zs, compressionLevel);
if (!result){
std::bad_alloc exception;
throw exception;
}
}
static int done(z_stream *zs){
return ZLibOpsBase_<OP>::done(zs);
}
static bool process(z_stream *zs, const char *data, size_t const size, char *data_out, size_t &size_out){
if (data == nullptr || size == 0 || data_out == nullptr || size_out == 0)
return false;
ZLibOpsBase_<OP>::reset(zs);
zs->next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data ) );
zs->avail_in = static_cast<uInt>( size );
zs->next_out = reinterpret_cast<Bytef *>( data_out );
zs->avail_out = static_cast<uInt>( size_out );
int const result = ZLibOpsBase_<OP>::process(zs);
size_out = zs->total_out;
return result == Z_STREAM_END;
}
};
// ===========================
inline z_stream *cast_(char *buffer){
static_assert( sizeof(z_stream) < ZLibBase_::BUFFER_SIZE, "Increase the buffer");
return reinterpret_cast<z_stream *>(buffer);
}
// ===========================
const int ZLibCompressor::NO_COMPRESSION = Z_NO_COMPRESSION ;
const int ZLibCompressor::BEST_SPEED = Z_BEST_SPEED ;
const int ZLibCompressor::BEST_COMPRESSION = Z_BEST_COMPRESSION ;
const int ZLibCompressor::DEFAULT_COMPRESSION = Z_DEFAULT_COMPRESSION ;
ZLibCompressor::ZLibCompressor(int const compressionLevel){
ZLibOps_<true>::init(cast_(buffer_), compressionLevel);
}
ZLibCompressor::~ZLibCompressor(){
ZLibOps_<true>::done(cast_(buffer_));
}
bool ZLibCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
return ZLibOps_<true>::process(cast_(buffer_), data_in, size_in, data_out, size_out);
}
// ===========================
ZLibDeCompressor::ZLibDeCompressor(){
ZLibOps_<false>::init(cast_(buffer_), 0);
}
ZLibDeCompressor::~ZLibDeCompressor(){
ZLibOps_<false>::done(cast_(buffer_));
}
bool ZLibDeCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
return ZLibOps_<false>::process(cast_(buffer_), data_in, size_in, data_out, size_out);
}
Finally the example usage:
#if 0
#include "zlibeasy.h"
using Compressor = ZLibEasyCompressor;
using DeCompressor = ZLibEasyDeCompressor;
#else
#include "zlib.h"
using Compressor = ZLibCompressor;
using DeCompressor = ZLibDeCompressor;
#endif
#include <cstring>
#include <iostream>
#include <iomanip>
#include <cstdio>
constexpr size_t SIZE = 1024;
int main(){
Compressor compressor;
DeCompressor decompressor;
const char *odata = "Hello World";
size_t osize = strlen(odata);
char cbuffer[SIZE];
size_t csize = SIZE;
char dbuffer[SIZE];
size_t dsize = SIZE;
bool const cr = compressor(odata, osize, cbuffer, csize);
bool const dr = decompressor(cbuffer, csize, dbuffer, dsize);
std::cout << std::fixed << std::setprecision(2) << ( float(osize) / float(csize) ) << std::endl;
if ( ! cr || ! dr || osize != dsize || memcmp(odata, dbuffer, osize) ){
std::cout << "Error" << std::endl;
}
}
1 Answer 1
You called your header zlib.h
, when you know (and depend on) a zlib.h
in the standard include path? That's heading straight into confusion land - I don't recommend that.
constexpr static size_t BUFFER_SIZE = 128;
There's no definition in scope for size_t
. I guess this was intended to be std::size_t
?
Don't use all-caps for identifiers - we normally reserve those for preprocessor macros, which need special care because they are unscoped text substitutions.
class ZLibCompressor : public ZLibBase_{
Does public inheritance really make sense here? I think this might be better as private. And perhaps move ZLibBase_
to an inner namespace to separate it from the public interface.
const static int NO_COMPRESSION ; const static int BEST_SPEED ; const static int BEST_COMPRESSION ; const static int DEFAULT_COMPRESSION ;
These might be better as an enumeration.
ZLibCompressor(int compressionLevel = DEFAULT_COMPRESSION);
I don't think we want this to be an implicit conversion - I recommend explicit
here to avoid surprises.
bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
const
is unnecessary on the size_in
argument, which is passed by value. It doesn't contribute to the function signature.
Prefer returning values rather than accepting out-parameter references: instead of returning a bool
to indicate failure, prefer to return the size written, and throw if there's a failure (with the error message from the stream); if the size written is still important on failure, then perhaps return a pair or tuple that the the caller can unpack (e.g. with a structured binding).
Consider passing destination first, so the interface is more like std::memcpy()
and therefore easier to remember.
class ZLibDeCompressor : private ZLibBase_{
That's a surprising capitalisation of "decompressor" that I'll probably mistype endless times. There's no need for a capital in the middle of that word.
template<bool OP> struct ZLibOpsBase_;
I'm not a fan of using a bool
as behaviour switch here. Prefer a scoped enumerator, which gives a clear name to identify which instantiation is compressor and which is decompressor.
memset(zs, 0, sizeof(z_stream));
We've misspelt std::memset
here. And prefer to match the size with the destination object: sizeof *zs
.
In any case, writing all-bytes zero is not the correct way to initialise a z_stream
as it contains pointers and function pointers.
std::bad_alloc exception; throw exception;
No need for a variable - we can simply throw std::bad_alloc{}
.
if (data == nullptr || size == 0 || data_out == nullptr || size_out == 0)
I don't think it should be an error to specify zero-length input, at least for compression.
zs->avail_in = static_cast<uInt>( size );
Zlib's uInt
is a type alias for unsigned int
. This cast is not safe if size > UINT_MAX
, so we should check that and take appropriate action if it's larger (decompressing in chunks, for instance).
size_t osize = strlen(odata);
Here, we've misspelt std::strlen
as well as std::size_t
.
std::cout << std::fixed << std::setprecision(2) << ( float(osize) / float(csize) ) << std::endl;
We don't need to flush output, so use '\n'
rather than std::endl
.
std::cout << "Error" << std::endl;
This looks like an error, so we should use std::cerr << "Error\n";
.
The design looks over-complicated for something that's just meant to be a combination of opaque implementation and strategy pattern. We can do that with a simple despatch table and much less inheritance:
#ifndef ZLIB_COMPRESSOR_H_
#define ZLIB_COMPRESSOR_H_
#include <cstddef>
#include <memory>
namespace detail
{
struct impl;
class ZLibProcessor
{
protected:
std::unique_ptr<detail::impl> p;
explicit ZLibProcessor(std::unique_ptr<detail::impl>,int);
~ZLibProcessor();
std::size_t operator()(char *dst, std::size_t size,
const char *src, std::size_t len);
};
};
class ZLibCompressor : detail::ZLibProcessor
{
public:
enum class compression_level {
none,
fast,
best,
normal,
};
explicit ZLibCompressor(compression_level level = compression_level::normal);
explicit ZLibCompressor(int compressionLevel);
using ZLibProcessor::operator();
};
class ZLibDecompressor : detail::ZLibProcessor
{
public:
ZLibDecompressor();
using ZLibProcessor::operator();
};
#endif
#include "zlib.h"
#include <stdexcept>
#include <zlib.h>
struct ZLibOps {
int(*init)(z_stream*,int);
int(*done)(z_stream*);
int(*reset)(z_stream*);
int(*process)(z_stream*,int);
};
struct detail::impl
{
ZLibOps const& ops;
z_stream zs = {
.zalloc = Z_NULL,
.zfree = Z_NULL,
.opaque = nullptr
};
void init(int const compressionLevel)
{
switch (ops.init(&zs, compressionLevel)) {
case Z_OK:
return;
case Z_MEM_ERROR:
throw std::bad_alloc{};
default:
throw std::runtime_error{zs.msg};
}
}
int done() noexcept
{
return ops.done(&zs);
}
std::size_t process(char *dest, std::size_t size,
char const *src, std::size_t len)
{
ops.reset(&zs);
zs.next_out = reinterpret_cast<Bytef*>(dest);
zs.avail_out = static_cast<uInt>(size);
zs.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(src));
zs.avail_in = static_cast<uInt>(len);
// FIXME: loop until size and len are both less than UINT_MAX
int const result = ops.process(&zs, Z_FINISH);
switch (result) {
case Z_STREAM_END:
return zs.total_out;
case Z_MEM_ERROR:
throw std::bad_alloc{};
default:
throw std::runtime_error{zs.msg};
}
}
};
// Base processor
detail::ZLibProcessor::ZLibProcessor(std::unique_ptr<detail::impl> imp, int compressionLevel)
: p{std::move(imp)}
{
p->init(compressionLevel);
}
detail::ZLibProcessor::~ZLibProcessor()
{
p->done();
};
std::size_t detail::ZLibProcessor::operator()(char *dest, std::size_t size,
char const *src, std::size_t len) {
return p->process(dest, size, src, len);
}
// Compressor
static constexpr ZLibOps deflate_ops = {
.init = [](z_stream *zs,int level) { return deflateInit(zs, level); },
.done = deflateEnd,
.reset = deflateReset,
.process = deflate
};
static int convert(ZLibCompressor::compression_level level)
{
switch (level) {
case ZLibCompressor::compression_level::none: return Z_NO_COMPRESSION;
case ZLibCompressor::compression_level::fast: return Z_BEST_SPEED;
case ZLibCompressor::compression_level::best: return Z_BEST_COMPRESSION;
case ZLibCompressor::compression_level::normal: return Z_DEFAULT_COMPRESSION;
};
throw std::logic_error{"Invalid compression level"};
}
ZLibCompressor::ZLibCompressor(ZLibCompressor::compression_level level)
: ZLibCompressor{convert(level)}
{}
ZLibCompressor::ZLibCompressor(int compressionLevel)
: ZLibProcessor{std::make_unique<detail::impl>(deflate_ops), compressionLevel}
{}
// Decompressor
static constexpr ZLibOps inflate_ops = {
.init = [](z_stream *zs,int) { return inflateInit(zs); },
.done = inflateEnd,
.reset = inflateReset,
.process = inflate
};
ZLibDecompressor::ZLibDecompressor()
: ZLibProcessor{std::make_unique<detail::impl>(inflate_ops), 0}
{}
#include "zlib.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <iostream>
constexpr std::size_t SIZE = 1024;
int main()
{
const char *odata = "Hello World";
const std::size_t osize = std::strlen(odata);
char cbuffer[SIZE];
const std::size_t csize = SIZE;
char dbuffer[SIZE];
const std::size_t dsize = SIZE;
try {
auto const cr = ZLibCompressor{}(cbuffer, csize, odata, osize);
auto const dr = ZLibDecompressor{}(dbuffer, dsize, cbuffer, csize);
// should recompress to same content
assert(dr == osize);
assert(!std::strncmp(odata, dbuffer, osize));
std::cout << std::fixed << std::setprecision(2) << (float(cr) / float(dr)) << '\n';
} catch (std::exception& e) {
std::cerr << e.what() << '\n';
return EXIT_FAILURE;
}
}
Explore related questions
See similar questions with these tags.