Motivation: Partially for fun / learning, but also so I can roll out a custom JSON-like file format for a fighting game I am writing. There are two caveats: you cannot have repeated keys (requires std::unordered_multimap
) and the keys are not in order (requires insertion order std::vector
on the side, or some external boost lib probably). I am mainly looking for critique on my level of comments and ability to read my code, but everything else I am of course welcome to hear.
#pragma once
#include <list>
#include <map>
#include <string>
#include <variant>
#include <vector>
#include <fstream>
#include <iostream>
#include <sstream>
namespace json {
class Value;
// six json data types
using null_t = std::nullptr_t;
using bool_t = bool;
using number_t = std::double_t;
using string_t = std::string;
using array_t = std::vector<Value>;
using object_t = std::map<string_t, Value>;
using aggregate_t = std::variant<
null_t, bool_t, number_t,
string_t, array_t, object_t>;
class Value : protected aggregate_t {
public:
using aggregate_t::variant;
// removes spurious E0291
Value() = default;
// converts int into double rather than bool
Value(int integer) : aggregate_t(static_cast<double>(integer)) {}
// converts c_string (pointer) into string rather than bool
Value(const char* c_string) : aggregate_t(std::string(c_string)) {}
public:
auto operator[](const string_t& key) -> Value& {
// transform into object if null
if (std::get_if<null_t>(this))
*this = object_t();
return std::get<object_t>(*this)[key];
}
auto operator[](std::size_t key) -> Value& {
// transform into array if null
if (std::get_if<null_t>(this))
*this = array_t();
if (key >= std::get<array_t>(*this).size())
std::get<array_t>(*this).resize(key + 1);
return std::get<array_t>(*this)[key];
}
auto save(std::ostream& stream, std::string prefix = "") -> std::ostream& {
static const std::string SPACING = " "; // "\t"; // " ";
// depending on the type, write to correct value with format to stream
std::visit([&stream, &prefix](auto&& value) {
using namespace std;
using T = decay_t<decltype(value)>;
if constexpr (is_same_v<T, nullptr_t>)
stream << "null";
if constexpr (is_same_v<T, bool_t>)
stream << (value ? "true" : "false");
else if constexpr (is_same_v<T, double_t>)
stream << value;
else if constexpr (is_same_v<T, string>)
stream << '"' << value << '"';
else if constexpr (is_same_v<T, array_t>) {
stream << "[\n";
auto [indent, remaining] = make_tuple(prefix + SPACING, value.size());
// for every json value, indent and print to stream
for (auto& json : value)
json.save(stream << indent, indent)
// if jsons remaining (not last), append comma
<< (--remaining ? ",\n" : "\n");
stream << prefix << "]";
}
else if constexpr (is_same_v<T, object_t>) {
stream << "{\n";
auto [indent, remaining] = make_tuple(prefix + SPACING, value.size());
// for every json value, indent with key and print to stream
for (auto& [key, json] : value)
json.save(stream << indent << '"' << key << "\" : ", indent)
// if jsons remaining (not last), append comma
<< (--remaining ? ",\n" : "\n");
stream << prefix << "}";
}
}, *static_cast<aggregate_t*>(this));
return stream;
}
auto load(std::istream& stream) -> std::istream& {
using namespace std;
switch ((stream >> ws).peek()) {
case '"': {
// get word surrounded by "
stringbuf buffer;
stream.ignore(1)
.get(buffer, '"')
.ignore(1);
*this = buffer.str();
} break;
case '[': {
array_t array;
for (stream.ignore(1); (stream >> ws).peek() != ']';)
// load child json and consume comma if available
if ((array.emplace_back().load(stream) >> ws).peek() == ',')
stream.ignore(1);
stream.ignore(1);
*this = move(array);
} break;
case '{': {
object_t object;
for (stream.ignore(1); (stream >> ws).peek() != '}';) {
// get word surrounded by "
stringbuf buffer;
stream.ignore(numeric_limits<streamsize>::max(), '"')
.get(buffer, '"')
.ignore(numeric_limits<streamsize>::max(), ':');
// load child json and consume comma if available
if ((object[buffer.str()].load(stream) >> ws).peek() == ',')
stream.ignore(1);
}
stream.ignore(1);
*this = move(object);
} break;
default: {
if (isdigit(stream.peek()) || stream.peek() == '.') {
double_t number;
stream >> number;
*this = number;
}
else if (isalpha(stream.peek())) {
// get alphabetic word
string word;
for (; isalpha(stream.peek()); stream.ignore())
word.push_back(stream.peek());
// set value to look-up table's value
static auto keyword_lut = map<string_view, Value>{
{"true", true}, {"false", false}, {"null", nullptr}};
*this = keyword_lut[word];
}
else
*this = nullptr;
} break;
}
return stream;
}
auto save_to_path(std::string_view file_path) -> void {
auto file = std::ofstream(std::string(file_path));
save(file);
}
auto load_from_path(std::string_view file_path) -> void {
auto file = std::ifstream(std::string(file_path));
load(file);
}
static void test() {
std::stringstream ss;
{
json::Value value;
auto& employee = value["employee"];
employee["name"] = "bob";
employee["age"] = 21;
employee["friends"][0] = "alice";
employee["friends"][1] = "billy";
employee["weight"] = 140.0;
value.save(ss);
}
std::cout << ss.str() << "\n\n";
{
auto example = std::stringstream(R"(
{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 27,
"address": {
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021-3100"
},
"phoneNumbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
},
{
"type": "mobile",
"number": "123 456-7890"
}
],
"children": [],
"spouse": null
})");
json::Value value;
value.load(example);
ss.clear();
value.save(ss);
}
std::cout << ss.str() << "\n\n";
}
};
}
int main() {
json::Value::test();
return getchar();
}
1 Answer 1
Let's go through the code and see what can be improved.
#pragma once
This shouldn't be in a non-header.
#include <list> #include <map> #include <string> #include <variant> #include <vector> #include <fstream> #include <iostream> #include <sstream>
Sort the include directives in alphabetical order.
namespace json { class Value; // six json data types using null_t = std::nullptr_t; using bool_t = bool; using number_t = std::double_t; using string_t = std::string; using array_t = std::vector<Value>; using object_t = std::map<string_t, Value>; using aggregate_t = std::variant< null_t, bool_t, number_t, string_t, array_t, object_t>;
json
is a very common name used by many people, leading to name clashes. Think of a more unique name. Maybe unicorn5838::json
?
I don't see a reason to use std::nullptr_t
for null_t
. std::nullptr_t
is a null pointer literal and can implicitly convert to pointers. Is that plausible? You can use std::monostate
instead, or just struct null_t { };
.
You are mixing std::double_t
and double
. My advice is to just use double
. Also, consistently use number_t
after the alias declaration.
class Value : protected aggregate_t { public: using aggregate_t::variant; // removes spurious E0291 Value() = default; // converts int into double rather than bool Value(int integer) : aggregate_t(static_cast<double>(integer)) {} // converts c_string (pointer) into string rather than bool Value(const char* c_string) : aggregate_t(std::string(c_string)) {}
Hmm ... Protected inheritance? Why do you need it in this case? (You can address this question with a comment if you have a good reason.)
Inheriting the constructors of std::variant
doesn't seem to be a good choice here. I see your effort in fixing the problems, but just providing your own constructors seems easier.
public: auto operator[](const string_t& key) -> Value& { // transform into object if null if (std::get_if<null_t>(this)) *this = object_t(); return std::get<object_t>(*this)[key]; } auto operator[](std::size_t key) -> Value& { // transform into array if null if (std::get_if<null_t>(this)) *this = array_t(); if (key >= std::get<array_t>(*this).size()) std::get<array_t>(*this).resize(key + 1); return std::get<array_t>(*this)[key]; }
Don't use multiple public:
labels.
Do not use the trailing return type syntax unless necessary. (Yeah, I know some people advocate always using a trailing return type, but it arguably makes the code more verbose.)
Your operator[]
automatically constructs the element if not existent, much like map::operator[]
but not vector::operator[]
. I'm not sure whether this behavior is intuitive enough to justify itself, but anyway ...
*this = object_t();
should be emplace<object_t>();
to prevent an unnecessary move construction.
You do std::get<array_t>(*this)
three times, and the complex code for getting the value will be run three times. Instead, use a reference:
auto& arr = std::get<array_t>(*this);
if (key >= arr.size())
arr.resize(key + 1);
return arr[key];
Also note that key + 1
may overflow (well, probably not a real problem).
auto save(std::ostream& stream, std::string prefix = "") -> std::ostream& { static const std::string SPACING = " "; // "\t"; // " "; // depending on the type, write to correct value with format to stream std::visit([&stream, &prefix](auto&& value) { using namespace std; using T = decay_t<decltype(value)>; if constexpr (is_same_v<T, nullptr_t>) stream << "null"; if constexpr (is_same_v<T, bool_t>) stream << (value ? "true" : "false"); else if constexpr (is_same_v<T, double_t>) stream << value; else if constexpr (is_same_v<T, string>) stream << '"' << value << '"'; else if constexpr (is_same_v<T, array_t>) { stream << "[\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent and print to stream for (auto& json : value) json.save(stream << indent, indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "]"; } else if constexpr (is_same_v<T, object_t>) { stream << "{\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent with key and print to stream for (auto& [key, json] : value) json.save(stream << indent << '"' << key << "\" : ", indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "}"; } }, *static_cast<aggregate_t*>(this)); return stream; }
Use null_t
and string_t
, not nullptr_t
and string
. Use const auto&
instead of auto&&
if you don't need the universal reference semantics. prefix
should be std::string_view
instead of by-value std::string
. The spacing should also be an argument instead of hard coded.
This is a very long function. The long if constexpr chain makes the code much less readable. Use overload resolution to break it down:
// somewhere
struct formatter {
std::ostream& os;
std::string_view prefix;
std::string_view indent;
void operator()(null_t) const;
void operator()(bool_t) const;
// etc.
};
then you can just do
std::visit(formatter{os, prefix, indent}, static_cast<aggregate_t&>(*this));
The string streaming should use std::quoted
to properly handle escaping.
Don't do this:
auto [indent, remaining] = make_tuple(prefix + SPACING, value.size());
It incurs a lot of overhead, both on performance and readability.
auto load(std::istream& stream) -> std::istream& { using namespace std; switch ((stream >> ws).peek()) { case '"': { // get word surrounded by " stringbuf buffer; stream.ignore(1) .get(buffer, '"') .ignore(1); *this = buffer.str(); } break; case '[': { array_t array; for (stream.ignore(1); (stream >> ws).peek() != ']';) // load child json and consume comma if available if ((array.emplace_back().load(stream) >> ws).peek() == ',') stream.ignore(1); stream.ignore(1); *this = move(array); } break; case '{': { object_t object; for (stream.ignore(1); (stream >> ws).peek() != '}';) { // get word surrounded by " stringbuf buffer; stream.ignore(numeric_limits<streamsize>::max(), '"') .get(buffer, '"') .ignore(numeric_limits<streamsize>::max(), ':'); // load child json and consume comma if available if ((object[buffer.str()].load(stream) >> ws).peek() == ',') stream.ignore(1); } stream.ignore(1); *this = move(object); } break; default: { if (isdigit(stream.peek()) || stream.peek() == '.') { double_t number; stream >> number; *this = number; } else if (isalpha(stream.peek())) { // get alphabetic word string word; for (; isalpha(stream.peek()); stream.ignore()) word.push_back(stream.peek()); // set value to look-up table's value static auto keyword_lut = map<string_view, Value>{ {"true", true}, {"false", false}, {"null", nullptr}}; *this = keyword_lut[word]; } else *this = nullptr; } break; } return stream; }
Don't use stringbuf
. It's a low level functionality. Use std::quoted
instead:
case '"': {
std::string str;
stream >> std::quoted(str);
emplace<string_t>(str);
break;
}
.ignore(1)
is slower than .get()
without aggressive optimization.
The table should be const
, and .at
(which looks up existing elements) should be used instead of []
(which creates new elements and cannot be used on const
maps). Using a map
for three strings is also an overkill and will introduce overhead.
auto save_to_path(std::string_view file_path) -> void { auto file = std::ofstream(std::string(file_path)); save(file); } auto load_from_path(std::string_view file_path) -> void { auto file = std::ifstream(std::string(file_path)); load(file); }
Please don't "always use auto
". I know it is suggested by Herb Sutter (right?), but it is really unidiomatic and distracting.
-
\$\begingroup\$ I've gone ahead and made all the corrections except the follow (please refute me if I am wrong):
#pragma once
it is a header file,protected aggregate_t
because a json IS-A variant, not HAS-A variant, multiplepublic:
labels separates my ctors / API and trailing returns is my coding style (I would change depending on my org). \$\endgroup\$Saxpy– Saxpy2019年10月08日 19:18:04 +00:00Commented Oct 8, 2019 at 19:18 -
\$\begingroup\$ ah two more notes: I can't use
std::string_view
forprefix
because I require append operations to tabify my json -> file operation, second I use astd::map
lookup table even if it is overkill because its easy to read and I will be adding my own custom keywords for my fighting game. \$\endgroup\$Saxpy– Saxpy2019年10月08日 19:28:59 +00:00Commented Oct 8, 2019 at 19:28 -
1\$\begingroup\$ @Saxpy "Is-a" should be indicated by private inheritance, not protected inheritance. Don't abuse multiple
public:
labels, which alter semantics, for separating declaration blocks; using a comment like// constructors
instead is much clearer. Always trailing return is a, well, unpopular, and illogical style (coding styles exist to improve readability).string_view
provides the operations you want. Keeping the map is understandable though, but at least make itconst
. \$\endgroup\$L. F.– L. F.2019年10月09日 09:47:17 +00:00Commented Oct 9, 2019 at 9:47 -
\$\begingroup\$ I see, I'll go ahead and change it to private inheritance. Also I'll take note on the
public:
blocks into// constructors
. I'd still like to keep trailing returns at this is my personal project and that's the style I've committed too and prefer on my own.string_view
does not offer append operations though, how could I refactor this? Thestd::map
will be const. \$\endgroup\$Saxpy– Saxpy2019年10月09日 16:55:53 +00:00Commented Oct 9, 2019 at 16:55 -
1\$\begingroup\$ @Saxpy Oops, I didn’t realize that the string view ctor is explicit. Sorry, I’ll reconsider the recommendation \$\endgroup\$L. F.– L. F.2019年10月12日 09:03:26 +00:00Commented Oct 12, 2019 at 9:03