cout << format(
"%? %?!\n" // Hello world!
"%1% %3% :)\n" // Hello again :)
"%%05X: %05X\n" // %05X: 01234
"%%g: %g\n" // %g: 3.14159
"%%.3f: %5$.3f\n" // %.3f: 3.142
, "Hello", "world", "again"
, 0x1234, 3.14159265f);
- The above shows the usage of universal formatting -
%?which wil just take the argument and pass it toostream << it("Hello"and"world!"). - Next line shows positional specifier -
%1% %3%(alternatively%1$? %3$s) to explicitly select first and third arguments ("Hello"and"again"). %%outputs single%,0changes to zero-padding,5sets the width andXwill do:
out << hex << uppercase << it.%gisout << defaultfloat << itand probably still needs some work.%5$.3fuses positional and precision specifiers.
How it works
The format is variadic function that will create format_pack which is derived from my data pack:
/// Tag for data pack with format string
struct format_pack_tag {};
/// Data pack with format string
template<class String = string, class... Elements>
class format_pack
: public format_pack_tag
, public pack<String, Elements...> {
public:
template<class... Args>
format_pack(Args&&... args)
: pack<String, Elements...>
( forward<Args>(args)...) {}
};
/// Create data pack with format string
template<class String, class... Elements>
inline format_pack<remove_cvref_t<String>,
remove_cvref_t<Elements>...>
format(String&& fmt, Elements&&... e) {
return format_pack<remove_cvref_t<String>,
remove_cvref_t<Elements>...>(
forward<String>(fmt),
forward<Elements>(e)...);
}
//=======================================================================
/// Stream-out the string
template<class String>
inline ostream& operator << (ostream& out,
const format_pack<String>& it) {
return out << it.value;
}
/// Stream-out the formatted data
template<class String, class... Elements>
inline ostream& operator << (ostream& out,
const format_pack<String, Elements...>& it) {
formatting::formatter(out, c_str(it.value), it.next);
return out;
}
There are some helpers used:
remove_cvref_t=remove_cv_t<remove_reference_t<T>>(C++14 style)c_strwill either return itsconst char *argument or callc_str()onstd::string.
The Formatter
/// Formatting engine (short-lived, designed to do the job and dismiss)
struct formatter {
const char *fmt;
ostream& out;
state sta;
///\copydoc formatting::advance()
const char * advance() {
return fmt = formatting::advance(fmt, out);
}
///\copydoc formatting::prepare()
const char * prepare() {
const char * pos = formatting::prepare(fmt, out, sta);
if (!pos) {
out.clear();
out << (fmt-1);
out.setstate(ios::failbit);
}
return fmt = pos;
}
///\copydoc formatting::output()
template<class T>
const char * output(const T& data) {
const char * pos = formatting::output(fmt, out, data, sta.flags);
if (!pos) {
out.clear();
out << fmt;
out.setstate(ios::failbit);
}
return fmt = pos;
}
///\copydoc output()
template<class T>
void operator() (const T& data) {
output(data);
}
/// Stream-out the format string (no data)
formatter(ostream& out, const char * fmt)
: out(out), fmt(nullptr) {
out << fmt;
}
/// Stream-out formatted data
template<class... Elements>
formatter(ostream& out, const char * fmt
, const pack<Elements...>& data)
: out(out), fmt(fmt) {
sta.apos = 0;
for(;;) {
if (!advance()) return;
if (!prepare()) return;
if (!data.exec(sta.apos, *this)) break;
++sta.apos;
}
out.setstate(ios::failbit);
out << fmt;
fmt = nullptr;
}
};
The Formatting (the hidden logic)
advance()will find the%(skipping any%%)prepare()will parse positional, width and precision specifiersoutput()will output the value and is overloaded with some SFINAE
(arithmetic, pointers, strings and all other).
namespace formatting {
///\brief Advance to next format specifier (\c '%')
///\return position of '%' or nullptr
inline const char* advance
( const char* pos ///< position in format string
, ostream& out ///< output stream
) {
const char* at = pos;
for(;;) {
if (*at != '%') do {
if (!*at) {
out.write(pos, at-pos);
return nullptr;
}
} while (*++at != '%');
if (*++at != '%') break;
out.write(pos, at-pos);
pos = ++at;
}
out.write(pos, at-1-pos);
return at;
}
/// Formatter state
struct state {
public:
uint apos; ///< argument position
uint flags; ///< additional flags
enum {
space = 1<< 0, ///< ' ' specifier (sign)
center = 1<< 1 ///< '=' specifier (alignment)
};
};
//TODO: use flags in output()
//=======================================================================
///\brief Prepare output stream state
///\return position of conversion specifier (after \c %w.p)
/// or `nullptr` on error + failbit on `out` (can throw).
inline const char *prepare
( const char* spec ///< position in format string after \c '%'
, ostream& out ///< output stream
, state& sta ///< formatter state
) {
out.flags(ios::right|ios::dec);
out.fill(' ');
out.width(0);
out.precision(6);
sta.flags = 0;
for(;;) {
char c = *spec++;
switch (c) {
// unknown character -> pass to output()
default:
return --spec;
// these are ignored (argument types for printf)
case 'h': case 'l': case 'j': case 'z': case 't': case 'L':
continue;
// adjustfield
case '-':
out.setf(ios::left, ios::adjustfield);
continue;
case '=':
sta.flags |= state::center;
continue;
// fill
case '0':
out.fill('0');
continue;
// sign
case '+':
out.setf(ios::showpos);
continue;
case ' ':
sta.flags |= state::space;
continue;
// width or position
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8':
case '9':
{ const char* num = spec-1;
while (isdigit(*spec)) ++spec;
int w = atoi(num);
if (*spec == '%' || *spec == '$') {
sta.apos = (uint)(w-1);
if (*spec == '%') {
return spec;
}
++spec;
continue;
}
out.width(w);
}
continue;
// precision
case '.':
//TODO: use some flag for this check
// if out.precision() != 6
// // second precision specifier
// out.setstate(ios::failbit)
// return nullptr
{ const char* num = spec;
while (isdigit(*spec)) ++spec;
out.precision(atoi(num));
}
continue;
}
}
}
//################################################################ output
#ifdef FIRDA_DOXYGEN_INVOKED_
///\brief Format the value.
///\return position after format specification
/// or `nullptr` on error + failbit on `out` (can throw).
template<class T> const char* output
( const char* spec ///< position in format string after '%'
, ostream& out ///< output stream
, const T& it ///< value to be formatted
, uint flags = 0 ///< formatter flags
);
#else
/// Format arithmetic value
template<class T> inline
enable_if_t<is_arithmetic<T>::value,
const char*>
output
( const char* spec ///< position in format string after '%'
, ostream& out ///< output stream
, T it ///< value to be formatted
, uint flags = 0 ///< formatter flags
) {
char c = *spec++;
if (isupper(c)) {
c = tolower(c);
out.setf(ios::uppercase);
}
switch (c) {
default:
out.setstate(ios::failbit);
return nullptr;
case '?': case '%':
out << it;
break;
case 'd': case 'i':
out << dec << try_make_signed_t<T>(it);
break;
case 'u':
out << dec << try_make_unsigned_t<T>(it);
break;
case 'x':
out << hex << it;
break;
case 'o':
out << oct << it;
break;
case 'g':
out << defaultfloat << it;
break;
case 'f':
out << fixed << it;
break;
case 'e':
out << scientific << it;
break;
//FIXME: consider signed vs. unsigned
case 'c': case 's':
out << char(it);
break;
}
//TODO: case '#' - special formatting
return spec;
}
//=======================================================================
/// Format pointer
template<class T> inline
enable_if_t<!is_same<T,char>::value,
const char*>
output
( const char* spec ///< position in format string after '%'
, ostream& out ///< output stream
, const T* it ///< value to be formatted
, uint flags = 0 ///< formatter flags
) {
char c = *spec++;
if (isupper(c)) {
c = tolower(c);
out.setf(ios::uppercase);
}
switch (c) {
case 'p':
out << hex << uintptr_t(it);
return spec;
default:
if (!it) {
//TODO: skip custom format
out << "null";
return spec;
}
return output(spec, out, *it);
}
}
//=======================================================================
/// Format string
template<class String> inline
enable_if_t<is_same<String,const char *>::value
|| is_same<String,const string&>::value,
const char*>
output
( const char* spec ///< position in format string after '%'
, ostream& out ///< output stream
, String it ///< string to be formatted
, uint flags = 0 ///< formatter flags
) {
out << it;
return ++spec;
}
//=======================================================================
/// Format generic
template<class T> inline
enable_if_t<!is_fundamental<decay_t<T>>::value
&& !is_pointer<decay_t<T>>::value,
const char*>
output
( const char* spec ///< position in format string after '%'
, ostream& out ///< output stream
, const T& it ///< value to be formatted
, uint flags = 0 ///< formatter flags
) {
char c = *spec++;
if (isupper(c)) {
c = tolower(c);
out.setf(ios::uppercase);
}
switch (c) {
//TODO: some conversions for cdiuxgfe (char, long long, long double)
default:
out.setstate(ios::failbit);
return nullptr;
case 's': case '?': case '%':
out << it;
break;
}
return spec;
}
#endif
Finally - delegated formatting
All I wanted was to create some engine for remote debug/trace log, to store all the data (with some code/ID) in some queue without actually formatting it and let another (debug) thread do the formatting (and output to cout or syslog). Just an example how it can be done:
struct record_base
virtual ostream& print(ostream&) = 0
virtual ~record_base() {}
template<class Pack>
struct record_impl
: record_base
Pack pack
template<class... Args>
record_impl(Args&&... args)
: pack(forward<Args>(args)...) {}
ostream& print(ostream& out) override
return out << pack
template<class String, class... Args>
unique_ptr<record_base>
record(String&& fmt, Args&&... args)
return unique_ptr<record_base>(new record_impl<
format_pack<string, remove_cref_t<Args>...> >
( forward<String>(fmt), forward<Args>(args)...) )
int main()
record(
"%? %?!\n" // Hello world!
"%1% %3% :)\n" // Hello again :)
"%%05X: %05X\n" // %05X: 01234
"%%g: %g\n" // %g: 3.14159
"%%.3f: %5$.3f\n" // %.3f: 3.142
, "Hello", "world", "again"
, 0x1234, 3.14159265f
).get()->print(cout)
1 Answer 1
Pointers and order of formatting::output overloads
The output for pointers should be last or forwarding has to be used, but there is a problem with pointers to unformattable objects - it won't compile even when we use simple %p formatting. Dropped (no more redirection, pointers shall be formatted as integral types, where %p is same as %x).
The very packing of arguments
Using that data pack together with remove_cvref_t seems not to be the best choice. It would be better to use similar structure without that #pragma pack(1) and following type selection:
/// Type selection for format pack
template<typename T> struct format_element_type
private:
using X = remove_reference_t<T>
static constexpr bool simple
= is_pointer<X>::value
|| (is_fundamental<X>::value
&& sizeof(X) <= sizeof(void*))
public:
using type = conditional_t<simple, X, X&>
template<typename T> using format_t
= typename format_element_type<T>::type
/// Create data pack with format string
template<class String, class... Elements>
inline format_pack<format_t<String>
, format_t<Elements>...>
format(String&& fmt, Elements&&... e)
return format_pack<format_t<String>
, format_t<Elements>...>
( forward<String>(fmt)
, forward<Elements>(e)...)
Benchmarking and possible format literals
As already suggested, packed references (like with tie creating tuple of references) are better then copying the whole object (and it is sometimes even not possible), but some fundamental types and pointers could be passed directly (but beware of arrays - decay_t is no good either, we have to preserve the information about number of elements). It should produce type-safe alternative to va_list (which is the reason for that sizeof(X) <= sizeof(void*) - fast indexing / positional specifier handling is to be examined and benchmarked).
Some compile-time parsing of format string (e.g. operator "" _fmt(const char *str, size_t len)) could help with selecting faster implementation (especially if the string contains no positional specifiers). Put all together, we could produce type-safe alternative to snprintf this way, that could possibly be even faster (because of compile-time type resolution and implementation selection).
As this is getting too much conceptual for CR, I will consider moving to programmers.
Comparision to boost::format
There still seems to be some advantages over boost::format in the fact that all the arguments are available immediatelly by the method selected - function call. Other than that, boost::format can be adapted to produce almost the same results (there is no %? universal formatting, positional %1% has to be used with boost AFAIK).
Simple experiments:
(frefs refers to format creation included in the code bellow, which uses the tuple-tie approach)
template<class String, class... Elements>
inline firda::format_pack<String, Elements...>
format(String&& fmt, Elements&&... e)
return firda::format_pack<String, Elements...>(
std::forward<String>(fmt), std::forward<Elements>(e)...)
template<class... Args> void apply(boost::format& bf, Args&&... args)
return
template<class T, class... Args> void apply
( boost::format& bf, T&& arg, Args&&... args )
bf % arg
apply(bf, args...)
template<class... Args> void test(const char * fmt, Args&&... args)
std::cout << "firda: " << firda::format(fmt, args...) << std::endl
std::cout << "frefs: " << format(fmt, args...) << std::endl
boost::format bf(fmt)
apply(bf, args...)
std::cout << "boost: " << bf << std::endl
struct simple
std::string text
friend std::ostream& operator << (std::ostream& out, const simple& it)
return out << it.text
int main()
test("hello %s!", "world")
test("pi = %g", 3.14159265)
test("simple: %1%", simple{"text"})
The Syntax
Python/Ruby programmers should find my altered syntax a bit familiar, although it is not and never will be Python/Ruby, it is still C++ with altered syntax, nothing more, nothing less.
Some more code for utnapistim (the remote trace log)
(The rqueue does not need to allocate individual records. It rather asks through record_size and record_type how much space is needed and what type is to be used for construction of the record. There is no allocation and/or moving at all with fixed_size<> option - everything is embedded within the buffer.)
///\brief Remote Trace Log \file
#include "rqueue.hpp" "thread.hpp" "format.hpp"
namespace firda
///\addtogroup rqueue
///\{
/// Trace record designed to pack all data for delayed formatting
class trace_record
template<class...> friend class rqueue
/// Enables polymorphic destruction (for rqueue only)
virtual ~trace_record() {}
public:
/// 1.1.2000 0:00
static constexpr time_point<system_clock> epoch
= time_point<system_clock>(seconds(946681200))
const uint time ///< seconds since 1.1.2000 0:00
const byte centi ///< 1/100 second fraction
const byte size ///< size of record payload in bytes
const word code ///< record type identificator
/// Print payload to `ostream` (record header not formatted)
virtual ostream& output(ostream& out) const
return out
/// Compute size of payload flat representation
virtual size_t length() const
return 0
/// Flatten payload to buffer
virtual size_t flatten(void* dst, size_t max) const
return 0
/// Flat version of the record (can be constructed from `trace_record`)
class flat
template<class...> friend class rqueue
public:
const uint time ///< seconds since 1.1.2000 0:00
const byte centi ///< 1/100 second fraction
const byte size ///< size of record payload in bytes
const word code ///< record type identificator
const void *data() const
return (const void *)(this + 1)
const byte *bytes() const
return (const byte *)data()
const byte& operator[] (size_t i) const
return *(bytes() + i)
private:
flat(size_t sz, const trace_record& src)
: time(src.time), centi(src.centi)
, size(sz - sizeof(flat)), code(src.code)
assert(size == src.length())
src.flatten((void*)(this+1), size)
static size_t record_size(const trace_record& src)
return sizeof(flat) + src.length()
size_t record_size() const
return sizeof(flat) + size
void record_move(void *dst)
memcpy(dst, this, record_size())
template<class...> using
record_type = flat
private:
#ifndef FIRDA_DOXYGEN_INVOKED_
forward template<class... Elements>
class data
template<class... Args>
struct record_type_
template<class Code>
struct record_type_<Code>
typedef trace_record type
template<class Code, class Format, class... Args>
struct record_type_<Code, Format, Args...>
typedef data<remove_cvref_t<Args>...> type
#endif
template<class...> friend class data
friend ostream& operator << (ostream&, const trace_record&)
trace_record(uint64_t ms, byte size, word code)
: time(ms / 1000), centi(ms / 10 % 100)
, size(size), code(code) {}
trace_record(byte size, word code)
: trace_record(duration_cast<milliseconds>
( system_clock::now() - epoch ).count()
, size, code) {}
size_t record_size() const
return sizeof(trace_record) + size
virtual void record_move(void *dst)
new(dst) trace_record(move(*this))
template<class... Args> using fpack
= format_pack<const char *, remove_cvref_t<Args>...>
template<class... Args> using
record_type = typename record_type_<Args...>::type
template<class... Args> static size_t
record_size(word, const char *, Args&&...)
return sizeof(record_type<word, const char *, Args...>)
//=======================================================================
/// Trace record implementation (for rqueue only)
template<class... Elements>
class trace_record::data
: trace_record, fpack<Elements...>
typedef fpack<Elements...> fpack
template<class...> friend class firda::rqueue
template<class... Args>
data(size_t sz, word code, const char *fmt, Args&&... args)
: trace_record((byte)(sz-sizeof(trace_record)), code)
, fpack(fmt, forward<Args>(args)...)
assert(size == sizeof(data)-sizeof(trace_record))
virtual void record_move(void *dst) override
new(dst) data(move(*this))
public:
virtual ostream& output(ostream& out) const override
return out << (const fpack&)*this
//=======================================================================
#ifndef FIRDA_DOXYGEN_INVOKED_
template<class First, class... Next>
class trace_record::data<First, Next...>
: trace_record, fpack<First, Next...>
typedef fpack<First, Next...> fpack
template<class...> friend class firda::rqueue
template<class... Args>
data(size_t sz, word code, const char *fmt, Args&&... args)
: trace_record((byte)(sz-sizeof(trace_record)), code)
, fpack(fmt, forward<Args>(args)...)
assert(size == sizeof(data)-sizeof(trace_record))
virtual void record_move(void *dst) override
new(dst) data(move(*this))
public:
virtual ostream& output(ostream& out) const override
return out << (const fpack&)*this
virtual size_t length() const
return fpack::next.length()
virtual size_t flatten(void* dst, size_t max) const
return fpack::next.flatten(dst, max)
#endif
//=======================================================================
inline ostream& operator << (ostream& out,
const trace_record& rec)
if !rec.size; return out << format("%04x", rec.code)
out << format("%04x: ", rec.code)
return rec.output(out)
inline ostream& operator << (ostream& out,
const trace_record::flat& rec)
out.setf(ios::right|ios::hex, ios::adjustfield|ios::basefield)
out.fill('0'); out << setw(2) << (uint)rec.size
for uint i = 0; i < rec.size; ++i
out << setw(2) << (uint)rec[i]
return out
///\}
prepare(). Knows whatostreamknows). 5. It does not use temporary string, the output is direct. 6. I already had that data pack class and rqueue (because of PLCs), it was just natural to write thisformat(). \$\endgroup\$advancedoesn't parse embedded strings diligently: in"\"%\""it will pick up a percent sign as a formatting one. \$\endgroup\$printf? I don't think so, you have to write double:"\"%%\"", it is not designed to understand whatever logic, you may wish to format something within the string:"\"%s\""\$\endgroup\$sprintf()is not type safe. You should never use it in C++ code. This code should be type safe (not read the details). Butmonstrosityis a very loaded word. If you look atstd::vector<>under the covers its amonstrositybut the interface it provides to the user should make using it clean and reduce errors. \$\endgroup\$