Compile time checking of printf-like format strings
Inspired by this open ticket on boost, this seeks to complete the work there
Given a printf-style format string and associated arguments, a static_assert
is performed on whether the format string and arguments are valid.
I'm particularly interested in:
- Have I covered all possible format strings?
- Am I doing this in the most efficient way?
Code:
#include <stdexcept>
#include <boost/format.hpp>
#include <boost/utility/string_ref.hpp>
template<std::size_t N>
constexpr bool checkValidFormats(const char (&fmt)[N], size_t n, char c)
{
return n >= N ?
false
: fmt[n] == c ?
true
: checkValidFormats(fmt, n + 1, c);
}
template<class>
struct FormatSupportedType;
#define SUPPORTED_TYPE(T, Fmts) \
template<> \
struct FormatSupportedType<T> \
{ \
constexpr static bool supports(char c) \
{ return checkValidFormats(Fmts, 0, c) \
? true : throw std::logic_error("invalid fmt for type"); } \
}
SUPPORTED_TYPE(char, "c");
SUPPORTED_TYPE(int, "d*");
SUPPORTED_TYPE(unsigned, "u*");
SUPPORTED_TYPE(char*, "s");
SUPPORTED_TYPE(const char*, "s");
SUPPORTED_TYPE(std::string, "s");
SUPPORTED_TYPE(boost::string_ref, "s");
SUPPORTED_TYPE(double, "f");
SUPPORTED_TYPE(float, "f");
/////////////////
constexpr bool isDigit(char c)
{
return c >= '0' && c <= '9';
}
constexpr bool isModifier(char c)
{
return c == 'l' ||
c == 'h' ||
c == 'j' ||
c == 'z' ||
c == 't' ||
c == 'L' ||
c == '#' ||
c == '+' ||
c == '-' ||
c == ' ' ||
c == '\'' ||
c == 'I' ||
c == '.' ||
c == '=' ||
isDigit(c);
}
template<std::size_t N>
constexpr size_t nextNonModifier(const char (&fmt)[N], std::size_t n)
{
return
n >= N ?
throw std::logic_error("invalid format string")
: isModifier(fmt[n]) ?
nextNonModifier(fmt, n + 1)
: n;
}
////////////////////
template<std::size_t N>
constexpr bool checkFormatHelper(const char (&fmt)[N], std::size_t n);
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelper(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args);
////////////////////
template<std::size_t N, typename T1, typename T2, typename T3, typename... Ts>
constexpr auto checkWidthAndPrecision(const char (&fmt)[N], std::size_t n, const T1& /*width*/, const T2& /*precision*/, const T3& /* arg */, const Ts&... args)
-> typename std::enable_if<
std::is_integral<T1>::value &&
std::is_integral<T2>::value,
bool>::type
{
return FormatSupportedType< typename std::decay<T3>::type>::supports(fmt[n]) &&
checkFormatHelper(fmt, n + 1, args...);
}
template<std::size_t N, typename... Ts>
constexpr bool checkWidthAndPrecision(const char (&)[N], std::size_t, const Ts&...)
{
return false;
}
////////////////////
template<std::size_t N, typename T1, typename T2, typename... Ts>
constexpr auto checkWidthOrPrecision(const char (&fmt)[N], std::size_t n, const T1& /*precision*/, const T2& /* arg */, const Ts&... args)
-> typename std::enable_if<
std::is_integral<T1>::value,
bool>::type
{
return FormatSupportedType< typename std::decay<T2>::type>::supports(fmt[n]) &&
checkFormatHelper(fmt, n + 1, args...);
}
template<std::size_t N, typename... Ts>
constexpr bool checkWidthOrPrecision(const char (&)[N], std::size_t, const Ts&...)
{
return false;
}
////////////////////
template<std::size_t N>
constexpr bool checkFormatHelper(const char (&fmt)[N], std::size_t n)
{
return
n>= N ?
true
: fmt[n] != '%' ?
checkFormatHelper(fmt, n + 1)
: fmt[n + 1] == '%' ?
checkFormatHelper(fmt, n + 2)
: false;
}
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelper(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args)
{
return
n >= N ?
throw std::logic_error("too many arguments for provided format string")
: fmt[n] != '%' ?
checkFormatHelper(fmt, n + 1, arg, args...)
// literal percent character
: (fmt[n + 1] == '%') ?
checkFormatHelper(fmt, n + 2, arg, args...)
// long-long modifier
: (fmt[n + 1] == 'l' && fmt[n + 2] == 'l') ?
FormatSupportedType< typename std::decay<T>::type >::supports(fmt[n + 3]) &&
checkFormatHelper(fmt, n + 4, args...)
// width & precision modifier
: (fmt[n + 1] == '*' && fmt[n + 2] == '.' && fmt[n + 3] == '*') ?
checkWidthAndPrecision(fmt, n + 4, arg, args...)
// width or precision modifier
: ((fmt[n + 1] == '.' && fmt[n + 2] == '*') || (fmt[n + 1] == '*')) ?
checkWidthOrPrecision(fmt, (fmt[n + 1] == '.' ? n + 3 : n + 2), arg, args...)
// other modifier
: (isModifier(fmt[n + 1])) ?
FormatSupportedType< typename std::decay<T>::type>::supports(fmt[nextNonModifier(fmt, n + 2)]) &&
checkFormatHelper(fmt, nextNonModifier(fmt, n + 2) + 1, args...)
// no modifier
: FormatSupportedType< typename std::decay<T>::type>::supports(fmt[n + 1]) &&
checkFormatHelper(fmt, n + 2, args...);
}
template<std::size_t N, class... Ts>
constexpr bool checkFormat(const char (&fmt)[N], const Ts&... args)
{
return checkFormatHelper(fmt, 0, args...);
}
// printing...
void add(boost::format&)
{ }
template<typename T, typename... Ts>
void add(boost::format& f, const T& arg, const Ts&... ts)
{
f % arg;
add(f, ts...);
}
#define LOG(fmt, ...) \
{ \
static_assert(checkFormat(fmt, ##__VA_ARGS__), "Format is incorrect"); \
boost::format f(fmt); \
add(f, ##__VA_ARGS__); \
std::cout << f.str() << std::endl; \
}
int main()
{
// char
LOG("%c", 'x');
// integral
LOG("%d", -123);
LOG("%ld", -123);
LOG("%u", 123u);
LOG("%lu", 123u);
// strings
LOG("%s", "hello world");
{ const char* s = "hello world"; LOG("%s", s); }
{ std::string s = "hello world"; LOG("%s", s); }
{ std::string s = "hello world"; boost::string_ref r(s); LOG("%s", r); }
// floating point
LOG("%f", 1.23);
LOG("%f", 1.23f);
// width / precision
LOG("%02d", 1);
LOG("%.2d", 123);
LOG("% 3s", "hello");
LOG("% 3s", "yo");
LOG("%.3s", "hello");
LOG("%.3s", "yo");
// incorrect format string
// LOG("%f", 1);
// LOG("%d", 1.23);
// not supported by boost::format
// LOG("%*s", 3, "yo");
// LOG("%*d", 3, 12);
// LOG("%.*s", 3, "hello");
// LOG("%.*d", 3, 12345);
// LOG("%*.*s", 3, 3, "hello");
// LOG("%*.*d", 3, 3, 12345);
}
Output:
x
-123
-123
123
123
hello world
hello world
hello world
hello world
1.230000
1.230000
01
123
hello
yo
hel
yo
Output when an invalid format string / argument combination is passed:
prog.cpp: In function 'int main()':
prog.cpp:190:9: error: non-constant condition for static assertion
static_assert(checkFormat(fmt, ##__VA_ARGS__), "Format is incorrect"); \
^
prog.cpp:226:5: note: in expansion of macro 'LOG'
LOG("%f", 1);
^
prog.cpp:226:5: in constexpr expansion of 'checkFormat<3u, {int}>((*"%f"), (* &1))'
prog.cpp:173:45: in constexpr expansion of 'checkFormatHelper<3u, int, {}>((* & fmt), 0u, (* & args#0))'
prog.cpp:166:82: in constexpr expansion of 'FormatSupportedType<int>::supports(((int)fmt[(n + 1u)]))'
prog.cpp:24:67: error: expression '<throw-expression>' is not a constant-expression
? true : throw std::logic_error("invalid fmt for type"); } \
^
prog.cpp:28:1: note: in expansion of macro 'SUPPORTED_TYPE'
SUPPORTED_TYPE(int, "d*");
^
-
1\$\begingroup\$ Follow-up question \$\endgroup\$200_success– 200_success2015年03月26日 01:44:22 +00:00Commented Mar 26, 2015 at 1:44
1 Answer 1
Overall a nice start.
But it is by no means complete.
The ## symbol joins the prev and the next token.
static_assert(checkFormat(fmt, ##__VA_ARGS__), "Format is incorrect"); \
boost::format f(fmt); \
add(f, ##__VA_ARGS__); \
(削除) So I am not sure what you are doing with it here. I would remove it completely. (削除ここまで)
@glampert pointed out that the ##
used with varargs on GNU is a special extension that effectively drops the proceeding comma if the argument list is empty.
Using ternary operator as an if/then/else
I think your use of the ternary operator makes the code less readable.
return
n>= N ?
true
: fmt[n] != '%' ?
checkFormatHelper(fmt, n + 1)
: fmt[n + 1] == '%' ?
checkFormatHelper(fmt, n + 2)
: false;
Is that as readable as:
if (n>= N) {
return true;
}
else if (fmt[n] != '%') {
return checkFormatHelper(fmt, n + 1);
}
else if (fmt[n + 1] == '%')
return checkFormatHelper(fmt, n + 2);
}
I suppose its a debatable one that. But your main loop becomes a bit more obtuse. I understand you are probably doing it because of the constexpr
requirements.
Format Specifier:
Also the format specifier is a bit more complex than this:
: fmt[n + 1] == '%' ?
checkFormatHelper(fmt, n + 2)
The format specifier is generalized to:
%[<flags>][<width>][.<precision>][<length>]<specifier>
flags := [-+ #0]* // Zero or more
width := <number>|'*' // A number or a '*'
precision := <number>|'*'
length := hh|h|l|ll|j|z|t|L
specifier := d|i|u|o|x|X|f|F|e|E|g|G|a|A|c|s|p|n|%
So you need to use something more like the code in your main helper function (looks like a case for re-use).
Length
You support the long long.
// long-long modifier
: (fmt[n + 1] == 'l' && fmt[n + 2] == 'l') ?
FormatSupportedType< typename std::decay<T>::type >::supports(fmt[n + 3]) &&
checkFormatHelper(fmt, n + 4, args...)
But that is only one of several length specifiers.
Also you are not checking to see if the other optional parts of the format specifier is there you just assume it comes directly after the ll
. You need your checkValidFormats()
to be slightly more sophisticated.
Width/Precision
// width & precision modifier
: (fmt[n + 1] == '*' && fmt[n + 2] == '.' && fmt[n + 3] == '*') ?
checkWidthAndPrecision(fmt, n + 4, arg, args...)
// width or precision modifier
: ((fmt[n + 1] == '.' && fmt[n + 2] == '*') || (fmt[n + 1] == '*')) ?
checkWidthOrPrecision(fmt, (fmt[n + 1] == '.' ? n + 3 : n + 2), arg, args...)
Note these values can also be numbers. Which means you should be skipping over the number characters.
But I think your code is incorrect here. Because if the width or precision specifier is *
then you should be expecting another runtime argument. Which means I would expect you to be checking the type of the current argument (which should be some form of integer).
Recomendation
I would split the checkFormatHelper()
into multiple functions that check the different parts of the format specifier.
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelper(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args)
{
return
n >= N ?
throw std::logic_error("too many arguments for provided format string")
: fmt[n] != '%' ?
checkFormatHelper(fmt, n + 1, arg, args...)
// literal percent character
: (fmt[n + 1] == '%') ?
checkFormatHelper(fmt, n + 2, arg, args...)
// Otherwise we have a format specifier.
// Check each part of the specifier
: checkFormatHelperFlags(fmt, n + 1, arg, args...);
}
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelperFlags(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args)
{
return
fmt[n] == '-' ? checkFormatHelperFlags(fmt, n+1, arg, args...)
: fmt[n] == '+' ? checkFormatHelperFlags(fmt, n+1, arg, args...)
: fmt[n] == ' ' ? checkFormatHelperFlags(fmt, n+1, arg, args...)
: fmt[n] == '#' ? checkFormatHelperFlags(fmt, n+1, arg, args...)
: fmt[n] == '0' ? checkFormatHelperFlags(fmt, n+1, arg, args...)
: checkFormatHelperWidth(fmt, n, arg, args...);
}
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelperWidth(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args)
{
return
isDigit(fmt[n]) ?
? checkFormatHelperPrecision(fmt, removeDigits(fmt,n) , arg, args...);
: fmt[n] == '*'
? FormatSupportedType< typename std::decay<T>::type>::supports("i") && checkFormatHelperPrecision(fmt, n+1, args...)
: checkFormatHelperPrecision(fmt, n, arg, args...);
}
template<std::size_t N, class T, class... Ts>
constexpr bool checkFormatHelperPrecision(const char (&fmt)[N], std::size_t n, const T& arg, const Ts&... args)
{
return
fmt[n] == '.'
? isDigit(fmt[n])
? checkFormatHelperLength(fmt, removeDigits(fmt,n) , arg, args...);
: fmt[n] == '*'
? FormatSupportedType< typename std::decay<T>::type>::supports("i") && checkFormatHelperLength(fmt, n+1, args...)
: throw std::logic_error("invalid fmt for type")
: checkFormatHelperLength(fmt, n, arg, args...);
}
// OK now you can do the difficult ones.
// The Length Call may need to pass type information forward to the
// Specifier check.
-
\$\begingroup\$ Loki, it looks like the
##
operator combined with__VA_ARGS__
is a GNU extension (see at the end). \$\endgroup\$glampert– glampert2015年03月23日 15:47:46 +00:00Commented Mar 23, 2015 at 15:47 -
\$\begingroup\$
Terminator for checkFormatHelper() always returns true:
I don't think that's correct - this is the overload for where there are no more arguments. If we reach the end of the format string and there are no more args, it is valid. If we find a single%
sign, then it's false, coz there are no more arguments. That is, it will return false iffmt[n] == '%' && fmt[n + 1] != '%'
(ie: we're not printing a literal%
character) \$\endgroup\$Steve Lorimer– Steve Lorimer2015年03月25日 06:26:38 +00:00Commented Mar 25, 2015 at 6:26 -
\$\begingroup\$ Yep. Just re-read that. You are correct. \$\endgroup\$Loki Astari– Loki Astari2015年03月25日 07:42:46 +00:00Commented Mar 25, 2015 at 7:42
-
\$\begingroup\$ @LokiAstari I'm not sure what the right thing to do is: update my question and make your answer a mismatch to the code, or add an answer with the latest iteration of the code based off your comments. I chose the latter. Thanks for your input, it is greatly appreciated! \$\endgroup\$Steve Lorimer– Steve Lorimer2015年03月25日 23:52:00 +00:00Commented Mar 25, 2015 at 23:52
-
\$\begingroup\$ @SteveLorimer: Best to ask a new question \$\endgroup\$Loki Astari– Loki Astari2015年03月26日 00:18:18 +00:00Commented Mar 26, 2015 at 0:18
Explore related questions
See similar questions with these tags.