This was inspired by the question Passing Programs To A Stack Machine Implemented In C++.
I wanted to make it a bit smarter by adding control functions:
instruction | op1 | op2 | op3 | description |
---|---|---|---|---|
jmp |
a | unconditional jump to a |
||
jgt |
a | b | c | jump to c if a > b |
jlt |
a | b | c | jump to c if a < b |
get |
a | push the element at a to the top of the stack |
||
set |
a | b | set the element at b to a |
|
wrt |
a | writes a to the console |
#include <charconv>
#include <functional>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <variant>
#include <vector>
template <class T>
class stackprog
{
public:
static T run(const std::vector<std::string>& arguments)
{
auto instructions = parse(arguments);
return process(instructions);
}
static T run(const std::string& prog)
{
std::vector<std::string> arguments;
std::stringstream ss(prog);
std::string item;
while (getline(ss, item, ' '))
arguments.push_back(item);
return run(arguments);
}
private:
static T pop(std::vector<T>& vec)
{
T retval = vec.back();
vec.pop_back();
return retval;
}
static void jump(std::vector<T>& stack, std::size_t& index, bool gt)
{
std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack)));
T b = pop(stack);
T a = stack.back();
if ((gt && a > b) || (!gt && a < b))
index = pos - 1;
}
typedef std::function<T(T)> func1arg;
typedef std::function<T(T, T)> func2arg;
typedef std::function<void(std::vector<T>&, std::size_t&)> funcctrl;
typedef std::variant<T, func1arg, func2arg, funcctrl> instruction;
static std::vector<instruction> parse(const std::vector<std::string>& args)
{
static std::map<std::string, func1arg> functions1{
{ "abs", [](T a) { return (T)std::fabs(a); } },
{ "cos", [](T a) { return (T)std::cos(a); } },
{ "exp", [](T a) { return (T)std::exp(a); } },
{ "log", [](T a) { return (T)std::log(a); } },
{ "sin", [](T a) { return (T)std::sin(a); } },
{ "tan", [](T a) { return (T)std::tan(a); } },
{ "flr", [](T a) { return (T)std::floor(a); } },
{ "sqrt", [](T a) { return (T)std::sqrt(a); } }
};
static std::map<std::string, func2arg> functions2{
{ "add", [](T a, T b) { return a + b; } },
{ "div", [](T a, T b) { return a / b; } },
{ "mul", [](T a, T b) { return a * b; } },
{ "sub", [](T a, T b) { return a - b; } },
{ "pow", [](T a, T b) { return (T)std::pow(a, b); } },
{ "mod", [](T a, T b) { return (T)std::fmod(a, b); } }
};
static std::map<std::string, funcctrl> functionsc{
{ "jmp", [](std::vector<T>& stack, std::size_t& index) { index = static_cast<std::size_t>(std::lround(pop(stack))) - 1; } },
{ "jgt", [](std::vector<T>& stack, std::size_t& index) { jump(stack, index, true); } },
{ "jlt", [](std::vector<T>& stack, std::size_t& index) { jump(stack, index, false); } },
{ "get", [](std::vector<T>& stack, std::size_t& index) { std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack))); stack.emplace_back(stack[pos]); } },
{ "set", [](std::vector<T>& stack, std::size_t& index) { std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack))); stack[pos] = pop(stack); } },
{ "wrt", [](std::vector<T>& stack, std::size_t& index) { std::cout << pop(stack); } },
};
std::vector<instruction> instructions;
for (auto arg : args)
{
T number;
auto ret = std::from_chars(arg.data(), arg.data() + arg.size(), number);
if (ret.ec == std::errc() && ret.ptr == arg.data() + arg.size())
{
instructions.emplace_back(number);
continue;
}
auto func1 = functions1.find(arg);
if (func1 != functions1.end())
{
instructions.emplace_back(func1->second);
continue;
}
auto func2 = functions2.find(arg);
if (func2 != functions2.end())
{
instructions.emplace_back(func2->second);
continue;
}
auto funcc = functionsc.find(arg);
if (funcc != functionsc.end())
{
instructions.emplace_back(funcc->second);
continue;
}
std::ostringstream oss;
oss << "Error: invalid instruction: " << arg;
throw std::runtime_error(oss.str());
}
return instructions;
}
static T process(const std::vector<instruction>& instructions)
{
std::vector<T> stack;
stack.reserve(instructions.size());
for (size_t i = 0; i < instructions.size(); i++)
{
const auto& inst = instructions[i];
switch (inst.index())
{
case 0:
stack.emplace_back(std::get<T>(inst));
break;
case 1:
stack.emplace_back(std::get<func1arg>(inst)(pop(stack)));
break;
case 2:
stack.emplace_back(std::get<func2arg>(inst)(pop(stack), pop(stack)));
break;
case 3:
std::get<funcctrl>(inst)(stack, i);
break;
}
}
return stack.empty() ? 0 : stack.back();
}
};
int main(int argc, char* argv[])
{
if (argc == 1)
{
// simple loop, expected result: 10000000
std::cout << stackprog<int>::run("0 1 add 1000000 1 jlt") << '\n';
// hypot, expected result: 5
std::cout << stackprog<double>::run("3 2 pow 4 2 pow add sqrt") << '\n';
// calculating pi, expected result: pi to the fifth digit
std::cout << stackprog<double>::run("0 -1 1 get 2 add 1 set 4 1 get div 0 get add 0 set 0 get 1 get 2 add 1 set 4 1 get div sub 0 set 1 get 1e6 4 jlt 0 get") << '\n';
// Hello World, expected result: Hello, World!
std::cout << stackprog<char>::run("72 wrt 101 wrt 108 wrt 108 wrt 111 wrt 44 wrt 32 wrt 87 wrt 111 wrt 114 wrt 108 wrt 100 wrt 33");
}
else
{
// execute from command line arguments
try
{
std::cout << stackprog<double>::run(std::vector<std::string>(argv + 1, argv + argc));
}
catch (std::exception err)
{
std::cerr << err.what();
return 1;
}
}
}
1 Answer 1
Security / error handling:
I don't think it's a good idea to allow the user to crash the program, or (worse) achieve undefined behavior when running the interpreter. This means we have to check that stack indices are valid before we use them, that we don't try to pop from an empty stack, and that the stack has only one item on it at the end.
We can either check manually, and throw
, or use std::vector::at()
.
The rest of this review is mainly nitpicking.
Unnecessary string copies:
(Since this is tagged performance ̄\_(ツ)_/ ̄
)
static T run(const std::string& prog) { std::vector<std::string> arguments; std::stringstream ss(prog); // copy of entire input string std::string item; while (getline(ss, item, ' ')) // copy of every argument arguments.push_back(item); // copy of every argument (again) return run(arguments); }
Honestly this is a perfectly normal way of doing it, (削除) but streams are awful and should be avoided (削除ここまで) but we could use a vector of std::string_view
for the arguments, and write our own getline
function to split the string, and thus avoid making any copies at all. Hopefully someday the standards committee will get around to fixing this bit of the standard library, but until then we have to do it ourselves.
I think it would be reasonable to parse directly into the instructions, instead of creating an intermediate vector. (Though I guess that would require concatenating the command line arguments).
static std::map<std::string, func1arg> functions1{ static std::map<std::string, func2arg> functions2{ static std::map<std::string, funcctrl> functionsc{
We could use std::string_view
for the keys here.
The maps could also be const
.
... for (auto arg : args) // unnecessary copy of every arg
It looks like could use auto const& arg
here.
std::ostringstream oss; oss << "Error: invalid instruction: " << arg; throw std::runtime_error(oss.str());
arg
is already a std::string
, so we could just throw std::runtime_error("Error: invalid instruction: " + arg);
. Even if it wasn't, we could use std::to_string(arg)
instead of a(削除) n evil (削除ここまで) stringstream
.
std::vector<std::string>(argv + 1, argv + argc)
Again, if we used std::vector<std::string_view>
we can avoid the copies here.
Modern if
conditions:
auto func1 = functions1.find(arg); if (func1 != functions1.end()) { instructions.emplace_back(func1->second); continue; }
Can be written as:
if (auto f1 = functions1.find(arg); f1 != functions1.end())
{
instructions.push_back(f1->second);
continue;
}
Which keeps f1
in as small a scope as possible.
Likewise:
T number; auto ret = std::from_chars(arg.data(), arg.data() + arg.size(), number); if (ret.ec == std::errc() && ret.ptr == arg.data() + arg.size()) { instructions.emplace_back(number); continue; }
This is definitely complex enough to factor out into a separate function: std::optional<T> to_number(std::string const& str);
. Then we could do:
if (auto n = to_number(arg); n.has_value())
{
instructions.push_back(n.value());
continue;
}
Magic numbers vs std::visit
:
for (size_t i = 0; i < instructions.size(); i++) { const auto& inst = instructions[i]; switch (inst.index()) { case 0: stack.emplace_back(std::get<T>(inst)); break; case 1: stack.emplace_back(std::get<func1arg>(inst)(pop(stack))); break; case 2: stack.emplace_back(std::get<func2arg>(inst)(pop(stack), pop(stack))); break; case 3: std::get<funcctrl>(inst)(stack, i); break; } }
Using magic number indices like this is a bit... icky.
To use std::visit
, we can either declare a visitor struct inside the function:
struct visitor
{
std::size_t& i;
std::vector<T>& stack;
void operator()(T t) { stack.push_back(t); }
void operator()(func1arg f1) { stack.push_back(f1(pop(stack))); }
void operator()(func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); }
void operator()(funcctrl fc) { fc(stack, i); }
};
for (size_t i = 0; i < instructions.size(); i++)
std::visit(visitor{ i, stack }, instructions[i]);
Which is a bit messy, because we only use the index sometimes.
Or we can use the inheritance trick from here with some lambdas:
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
...
for (size_t i = 0; i < instructions.size(); i++)
{
std::visit(
overloaded
{
[&] (T t) { stack.push_back(t); },
[&] (func1arg f1) { stack.push_back(f1(pop(stack))); },
[&] (func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); },
[&] (funcctrl fc) { fc(stack, i); }
},
instructions[i]);
}
Neither of these methods is perfect, but they're perhaps a little clearer.
Popping:
static_cast<std::size_t>(std::lround(pop(stack)))
There are enough of these around that it would be worth factoring out into a pop_stack_index
function.
Magic numbers:
catch (std::exception err) { std::cerr << err.what() << std::endl; return 1; }
We could use EXIT_FAILURE
from <cstdlib>
.
-
1\$\begingroup\$ Thanks for the review and good tips. Do you have any thoughts about wrapping
stackprog
in a class? I initially tried to do it with stand-alone functions in astackprog
namespace, but made a huge mess of the templates. \$\endgroup\$jdt– jdt2021年11月14日 19:46:59 +00:00Commented Nov 14, 2021 at 19:46 -
2\$\begingroup\$ I was actually going to say "it's all static functions and no state, so it could be a namespace", but then I saw that it makes the templates easier. You could maybe make a couple of free functions:
template<class T> run(...);
that callstackprog<T>::run()
and then the user wouldn't need to seestackprog
at all. \$\endgroup\$user673679– user6736792021年11月14日 19:53:03 +00:00Commented Nov 14, 2021 at 19:53 -
1\$\begingroup\$ "if we used
std::vector<std::string_view>
we can avoid the copies" It would still make copies of the pointers and call the equivalent ofstrlen()
for every argument. You could makerun()
take astd::span<const char *> args
, so you can call it like so:run({argv, argv + argc})
, and inparse()
do afor (std::string_view arg: args) ...
. \$\endgroup\$G. Sliepen– G. Sliepen2021年11月14日 21:26:04 +00:00Commented Nov 14, 2021 at 21:26 -
1\$\begingroup\$ @G.Sliepen The advantage of using
string_view
instead of aconst char*
is that you don't have to re-check the length. The argument you use when looking up something in the map should already be astring
orstring_view
, not (only) the pointer to the beginning of the split-out argument. \$\endgroup\$JDługosz– JDługosz2021年11月15日 15:45:35 +00:00Commented Nov 15, 2021 at 15:45