4
\$\begingroup\$

I needed to write zlib wrapper class for my application, so I can change it with different algorithm later if I want to do so.

Here is the simple version:

struct ZLibEasyCompressor{
 bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
};
struct ZLibEasyDeCompressor{
 bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
};
// ===========================
bool ZLibEasyCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
 if (data_in == nullptr || size_in == 0 || data_out == nullptr || size_out == 0)
 return false;
 z_stream zs;
 memset(&zs, 0, sizeof(z_stream));
 if ( deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK )
 return false;
 zs.next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data_in ) );
 zs.avail_in = static_cast<uInt>( size_in );
 zs.next_out = reinterpret_cast<Bytef *>( data_out );
 zs.avail_out = static_cast<uInt>( size_out );
 int const result = deflate(&zs, Z_FINISH);
 size_out = zs.total_out;
 deflateEnd(&zs);
 return result == Z_STREAM_END;
}
// ===========================
bool ZLibEasyDeCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
 if (data_in == nullptr || size_in == 0 || data_out == nullptr || size_out == 0)
 return false;
 z_stream zs;
 memset(&zs, 0, sizeof(z_stream));
 if ( inflateInit(&zs) != Z_OK )
 return false;
 zs.next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data_in ) );
 zs.avail_in = static_cast<uInt>( size_in );
 zs.next_out = reinterpret_cast<Bytef *>( data_out );
 zs.avail_out = static_cast<uInt>( size_out );
 int const result = inflate(&zs, Z_FINISH);
 size_out = zs.total_out;
 inflateEnd(&zs);
 return result == Z_STREAM_END;
}

This all works well, but:

  • have repetition in the code that can be omit using templates or something.
  • it initialize the control structure z_stream every time. As we can see later, this degrade the performance with 33%.

However, if I move z_stream to header file, there is a huge problem - I need to include zlib.h into the header and it will "spill" lots of constants and defines all around.

I wanted to avoid this at all costs:

https://stackoverflow.com/questions/39534366/how-to-keep-global-namespace-clean-from-third-party-lib

The dynamic allocation with opaque / void pointer can fix this, but there will be one more call for allocation / de-allocation.

This is why I did something crazy - I used a buffer + cast (we discussed it as placement new).

So here is the final version. It is:

  • no runtime overhead. I compared against plain C version and speed is same.
  • faster compression when you reuse the same class.
  • 33% - 40% faster decompression when you reuse the same class (depends of optimization).
  • no code repetition.
  • will refuse to compile if something is changed in z_stream.

zlib.h

#ifndef ZLIB_COMPRESSOR_H_
#define ZLIB_COMPRESSOR_H_
#include <cstddef>
struct ZLibBase_{
 constexpr static size_t BUFFER_SIZE = 128;
};
class ZLibCompressor : public ZLibBase_{
public:
 const static int NO_COMPRESSION ;
 const static int BEST_SPEED ;
 const static int BEST_COMPRESSION ;
 const static int DEFAULT_COMPRESSION ;
public:
 ZLibCompressor(int compressionLevel = DEFAULT_COMPRESSION);
 ~ZLibCompressor();
 bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
private:
 char buffer_[BUFFER_SIZE];
};
// ===========================
class ZLibDeCompressor : public ZLibBase_{
public:
 ZLibDeCompressor();
 ~ZLibDeCompressor();
 bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);
private:
 char buffer_[BUFFER_SIZE];
};
#endif

zlib.cc

#include "zlib.h"
#include <stdexcept>
#include <cstring>
#include <zlib.h>
template<bool OP>
struct ZLibOpsBase_;
template<>
struct ZLibOpsBase_<true>{
 static bool init(z_stream *zs, int const compressionLevel){
 return deflateInit(zs, compressionLevel) == Z_OK;
 }
 static int done(z_stream *zs){
 return deflateEnd(zs);
 }
 static int reset(z_stream *zs){
 return deflateReset(zs);
 }
 static int process(z_stream *zs){
 return deflate(zs, Z_FINISH);
 }
};
// ===========================
template<>
struct ZLibOpsBase_<false>{
 static bool init(z_stream *zs, int const){
 return inflateInit(zs) == Z_OK;
 }
 static int done(z_stream *zs){
 return inflateEnd(zs);
 }
 static int reset(z_stream *zs){
 return inflateReset(zs);
 }
 static int process(z_stream *zs){
 return inflate(zs, Z_FINISH);
 }
};
// ===========================
template<bool OP>
struct ZLibOps_{
 static void init(z_stream *zs, int const compressionLevel){
 memset(zs, 0, sizeof(z_stream));
 int const result = ZLibOpsBase_<OP>::init(zs, compressionLevel);
 if (!result){
 std::bad_alloc exception;
 throw exception;
 }
 }
 static int done(z_stream *zs){
 return ZLibOpsBase_<OP>::done(zs);
 }
 static bool process(z_stream *zs, const char *data, size_t const size, char *data_out, size_t &size_out){
 if (data == nullptr || size == 0 || data_out == nullptr || size_out == 0)
 return false;
 ZLibOpsBase_<OP>::reset(zs);
 zs->next_in = reinterpret_cast<Bytef *>( const_cast<char *>( data ) );
 zs->avail_in = static_cast<uInt>( size );
 zs->next_out = reinterpret_cast<Bytef *>( data_out );
 zs->avail_out = static_cast<uInt>( size_out );
 int const result = ZLibOpsBase_<OP>::process(zs);
 size_out = zs->total_out;
 return result == Z_STREAM_END;
 }
};
// ===========================
inline z_stream *cast_(char *buffer){
 static_assert( sizeof(z_stream) < ZLibBase_::BUFFER_SIZE, "Increase the buffer");
 return reinterpret_cast<z_stream *>(buffer);
}
// ===========================
const int ZLibCompressor::NO_COMPRESSION = Z_NO_COMPRESSION ;
const int ZLibCompressor::BEST_SPEED = Z_BEST_SPEED ;
const int ZLibCompressor::BEST_COMPRESSION = Z_BEST_COMPRESSION ;
const int ZLibCompressor::DEFAULT_COMPRESSION = Z_DEFAULT_COMPRESSION ;
ZLibCompressor::ZLibCompressor(int const compressionLevel){
 ZLibOps_<true>::init(cast_(buffer_), compressionLevel);
}
ZLibCompressor::~ZLibCompressor(){
 ZLibOps_<true>::done(cast_(buffer_));
}
bool ZLibCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
 return ZLibOps_<true>::process(cast_(buffer_), data_in, size_in, data_out, size_out);
}
// ===========================
ZLibDeCompressor::ZLibDeCompressor(){
 ZLibOps_<false>::init(cast_(buffer_), 0);
}
ZLibDeCompressor::~ZLibDeCompressor(){
 ZLibOps_<false>::done(cast_(buffer_));
}
bool ZLibDeCompressor::operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out){
 return ZLibOps_<false>::process(cast_(buffer_), data_in, size_in, data_out, size_out);
}

Finally the example usage:

#if 0
#include "zlibeasy.h"
using Compressor = ZLibEasyCompressor;
using DeCompressor = ZLibEasyDeCompressor;
#else
#include "zlib.h"
using Compressor = ZLibCompressor;
using DeCompressor = ZLibDeCompressor;
#endif
#include <cstring>
#include <iostream>
#include <iomanip>
#include <cstdio>
constexpr size_t SIZE = 1024;
int main(){
 Compressor compressor;
 DeCompressor decompressor;
 const char *odata = "Hello World";
 size_t osize = strlen(odata);
 char cbuffer[SIZE];
 size_t csize = SIZE;
 char dbuffer[SIZE];
 size_t dsize = SIZE;
 bool const cr = compressor(odata, osize, cbuffer, csize);
 bool const dr = decompressor(cbuffer, csize, dbuffer, dsize);
 std::cout << std::fixed << std::setprecision(2) << ( float(osize) / float(csize) ) << std::endl;
 if ( ! cr || ! dr || osize != dsize || memcmp(odata, dbuffer, osize) ){
 std::cout << "Error" << std::endl;
 }
}
asked Sep 17, 2016 at 10:13
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

You called your header zlib.h, when you know (and depend on) a zlib.h in the standard include path? That's heading straight into confusion land - I don't recommend that.


 constexpr static size_t BUFFER_SIZE = 128;

There's no definition in scope for size_t. I guess this was intended to be std::size_t?

Don't use all-caps for identifiers - we normally reserve those for preprocessor macros, which need special care because they are unscoped text substitutions.


class ZLibCompressor : public ZLibBase_{

Does public inheritance really make sense here? I think this might be better as private. And perhaps move ZLibBase_ to an inner namespace to separate it from the public interface.


 const static int NO_COMPRESSION ;
 const static int BEST_SPEED ;
 const static int BEST_COMPRESSION ;
 const static int DEFAULT_COMPRESSION ;

These might be better as an enumeration.


 ZLibCompressor(int compressionLevel = DEFAULT_COMPRESSION);

I don't think we want this to be an implicit conversion - I recommend explicit here to avoid surprises.


bool operator()(const char *data_in, size_t const size_in, char *data_out, size_t &size_out);

const is unnecessary on the size_in argument, which is passed by value. It doesn't contribute to the function signature.

Prefer returning values rather than accepting out-parameter references: instead of returning a bool to indicate failure, prefer to return the size written, and throw if there's a failure (with the error message from the stream); if the size written is still important on failure, then perhaps return a pair or tuple that the the caller can unpack (e.g. with a structured binding).

Consider passing destination first, so the interface is more like std::memcpy() and therefore easier to remember.


class ZLibDeCompressor : private ZLibBase_{

That's a surprising capitalisation of "decompressor" that I'll probably mistype endless times. There's no need for a capital in the middle of that word.


template<bool OP>
struct ZLibOpsBase_;

I'm not a fan of using a bool as behaviour switch here. Prefer a scoped enumerator, which gives a clear name to identify which instantiation is compressor and which is decompressor.


 memset(zs, 0, sizeof(z_stream));

We've misspelt std::memset here. And prefer to match the size with the destination object: sizeof *zs.

In any case, writing all-bytes zero is not the correct way to initialise a z_stream as it contains pointers and function pointers.


 std::bad_alloc exception;
 throw exception;

No need for a variable - we can simply throw std::bad_alloc{}.


 if (data == nullptr || size == 0 || data_out == nullptr || size_out == 0)

I don't think it should be an error to specify zero-length input, at least for compression.


 zs->avail_in = static_cast<uInt>( size );

Zlib's uInt is a type alias for unsigned int. This cast is not safe if size > UINT_MAX, so we should check that and take appropriate action if it's larger (decompressing in chunks, for instance).


 size_t osize = strlen(odata);

Here, we've misspelt std::strlen as well as std::size_t.


 std::cout << std::fixed << std::setprecision(2) << ( float(osize) / float(csize) ) << std::endl;

We don't need to flush output, so use '\n' rather than std::endl.


 std::cout << "Error" << std::endl;

This looks like an error, so we should use std::cerr << "Error\n";.


The design looks over-complicated for something that's just meant to be a combination of opaque implementation and strategy pattern. We can do that with a simple despatch table and much less inheritance:

#ifndef ZLIB_COMPRESSOR_H_
#define ZLIB_COMPRESSOR_H_
#include <cstddef>
#include <memory>
namespace detail
{
 struct impl;
 class ZLibProcessor
 {
 protected:
 std::unique_ptr<detail::impl> p;
 explicit ZLibProcessor(std::unique_ptr<detail::impl>,int);
 ~ZLibProcessor();
 std::size_t operator()(char *dst, std::size_t size,
 const char *src, std::size_t len);
 };
};
class ZLibCompressor : detail::ZLibProcessor
{
public:
 enum class compression_level {
 none,
 fast,
 best,
 normal,
 };
 explicit ZLibCompressor(compression_level level = compression_level::normal);
 explicit ZLibCompressor(int compressionLevel);
 using ZLibProcessor::operator();
};
class ZLibDecompressor : detail::ZLibProcessor
{
public:
 ZLibDecompressor();
 using ZLibProcessor::operator();
};
#endif
#include "zlib.h"
#include <stdexcept>
#include <zlib.h>
struct ZLibOps {
 int(*init)(z_stream*,int);
 int(*done)(z_stream*);
 int(*reset)(z_stream*);
 int(*process)(z_stream*,int);
};
struct detail::impl
{
 ZLibOps const& ops;
 z_stream zs = {
 .zalloc = Z_NULL,
 .zfree = Z_NULL,
 .opaque = nullptr
 };
 void init(int const compressionLevel)
 {
 switch (ops.init(&zs, compressionLevel)) {
 case Z_OK:
 return;
 case Z_MEM_ERROR:
 throw std::bad_alloc{};
 default:
 throw std::runtime_error{zs.msg};
 }
 }
 int done() noexcept
 {
 return ops.done(&zs);
 }
 std::size_t process(char *dest, std::size_t size,
 char const *src, std::size_t len)
 {
 ops.reset(&zs);
 zs.next_out = reinterpret_cast<Bytef*>(dest);
 zs.avail_out = static_cast<uInt>(size);
 zs.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(src));
 zs.avail_in = static_cast<uInt>(len);
 // FIXME: loop until size and len are both less than UINT_MAX
 int const result = ops.process(&zs, Z_FINISH);
 switch (result) {
 case Z_STREAM_END:
 return zs.total_out;
 case Z_MEM_ERROR:
 throw std::bad_alloc{};
 default:
 throw std::runtime_error{zs.msg};
 }
 }
};
// Base processor
detail::ZLibProcessor::ZLibProcessor(std::unique_ptr<detail::impl> imp, int compressionLevel)
 : p{std::move(imp)}
{
 p->init(compressionLevel);
}
detail::ZLibProcessor::~ZLibProcessor()
{
 p->done();
};
std::size_t detail::ZLibProcessor::operator()(char *dest, std::size_t size,
 char const *src, std::size_t len) {
 return p->process(dest, size, src, len);
}
// Compressor
static constexpr ZLibOps deflate_ops = {
 .init = [](z_stream *zs,int level) { return deflateInit(zs, level); },
 .done = deflateEnd,
 .reset = deflateReset,
 .process = deflate
};
static int convert(ZLibCompressor::compression_level level)
{
 switch (level) {
 case ZLibCompressor::compression_level::none: return Z_NO_COMPRESSION;
 case ZLibCompressor::compression_level::fast: return Z_BEST_SPEED;
 case ZLibCompressor::compression_level::best: return Z_BEST_COMPRESSION;
 case ZLibCompressor::compression_level::normal: return Z_DEFAULT_COMPRESSION;
 };
 throw std::logic_error{"Invalid compression level"};
}
ZLibCompressor::ZLibCompressor(ZLibCompressor::compression_level level)
 : ZLibCompressor{convert(level)}
{}
ZLibCompressor::ZLibCompressor(int compressionLevel)
 : ZLibProcessor{std::make_unique<detail::impl>(deflate_ops), compressionLevel}
{}
// Decompressor
static constexpr ZLibOps inflate_ops = {
 .init = [](z_stream *zs,int) { return inflateInit(zs); },
 .done = inflateEnd,
 .reset = inflateReset,
 .process = inflate
};
ZLibDecompressor::ZLibDecompressor()
 : ZLibProcessor{std::make_unique<detail::impl>(inflate_ops), 0}
{}
#include "zlib.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <iostream>
constexpr std::size_t SIZE = 1024;
int main()
{
 const char *odata = "Hello World";
 const std::size_t osize = std::strlen(odata);
 char cbuffer[SIZE];
 const std::size_t csize = SIZE;
 char dbuffer[SIZE];
 const std::size_t dsize = SIZE;
 try {
 auto const cr = ZLibCompressor{}(cbuffer, csize, odata, osize);
 auto const dr = ZLibDecompressor{}(dbuffer, dsize, cbuffer, csize);
 // should recompress to same content
 assert(dr == osize);
 assert(!std::strncmp(odata, dbuffer, osize));
 std::cout << std::fixed << std::setprecision(2) << (float(cr) / float(dr)) << '\n';
 } catch (std::exception& e) {
 std::cerr << e.what() << '\n';
 return EXIT_FAILURE;
 }
}
answered Oct 17, 2024 at 16:50
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.