Simple stack based interpreter

Question 1

This was inspired by the question Passing Programs To A Stack Machine Implemented In C++.

I wanted to make it a bit smarter by adding control functions:

instruction	op1	op2	op3	description
`jmp`	a	unconditional jump to `a`
`jgt`	a	b	c	jump to `c` if `a > b`
`jlt`	a	b	c	jump to `c` if `a < b`
`get`	a	push the element at `a` to the top of the stack
`set`	a	b	set the element at `b` to `a`
`wrt`	a	writes `a` to the console

#include <charconv>
#include <functional>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <variant>
#include <vector>
template <class T>
class stackprog
{
public:
 static T run(const std::vector<std::string>& arguments)
 {
 auto instructions = parse(arguments);
 return process(instructions);
 }
 static T run(const std::string& prog)
 {
 std::vector<std::string> arguments;
 std::stringstream ss(prog);
 std::string item;
 while (getline(ss, item, ' '))
 arguments.push_back(item);
 return run(arguments);
 }
private:
 static T pop(std::vector<T>& vec)
 {
 T retval = vec.back();
 vec.pop_back();
 return retval;
 }
 static void jump(std::vector<T>& stack, std::size_t& index, bool gt)
 {
 std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack)));
 T b = pop(stack);
 T a = stack.back();
 if ((gt && a > b) || (!gt && a < b))
 index = pos - 1;
 }
 typedef std::function<T(T)> func1arg;
 typedef std::function<T(T, T)> func2arg;
 typedef std::function<void(std::vector<T>&, std::size_t&)> funcctrl;
 typedef std::variant<T, func1arg, func2arg, funcctrl> instruction;
 static std::vector<instruction> parse(const std::vector<std::string>& args)
 {
 static std::map<std::string, func1arg> functions1{
 { "abs", [](T a) { return (T)std::fabs(a); } },
 { "cos", [](T a) { return (T)std::cos(a); } },
 { "exp", [](T a) { return (T)std::exp(a); } },
 { "log", [](T a) { return (T)std::log(a); } },
 { "sin", [](T a) { return (T)std::sin(a); } },
 { "tan", [](T a) { return (T)std::tan(a); } },
 { "flr", [](T a) { return (T)std::floor(a); } },
 { "sqrt", [](T a) { return (T)std::sqrt(a); } }
 };
 static std::map<std::string, func2arg> functions2{
 { "add", [](T a, T b) { return a + b; } },
 { "div", [](T a, T b) { return a / b; } },
 { "mul", [](T a, T b) { return a * b; } },
 { "sub", [](T a, T b) { return a - b; } },
 { "pow", [](T a, T b) { return (T)std::pow(a, b); } },
 { "mod", [](T a, T b) { return (T)std::fmod(a, b); } }
 };
 static std::map<std::string, funcctrl> functionsc{
 { "jmp", [](std::vector<T>& stack, std::size_t& index) { index = static_cast<std::size_t>(std::lround(pop(stack))) - 1; } },
 { "jgt", [](std::vector<T>& stack, std::size_t& index) { jump(stack, index, true); } },
 { "jlt", [](std::vector<T>& stack, std::size_t& index) { jump(stack, index, false); } },
 { "get", [](std::vector<T>& stack, std::size_t& index) { std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack))); stack.emplace_back(stack[pos]); } },
 { "set", [](std::vector<T>& stack, std::size_t& index) { std::size_t pos = static_cast<std::size_t>(std::lround(pop(stack))); stack[pos] = pop(stack); } },
 { "wrt", [](std::vector<T>& stack, std::size_t& index) { std::cout << pop(stack); } },
 };
 std::vector<instruction> instructions;
 for (auto arg : args)
 {
 T number;
 auto ret = std::from_chars(arg.data(), arg.data() + arg.size(), number);
 if (ret.ec == std::errc() && ret.ptr == arg.data() + arg.size())
 {
 instructions.emplace_back(number);
 continue;
 }
 auto func1 = functions1.find(arg);
 if (func1 != functions1.end())
 {
 instructions.emplace_back(func1->second);
 continue;
 }
 auto func2 = functions2.find(arg);
 if (func2 != functions2.end())
 {
 instructions.emplace_back(func2->second);
 continue;
 }
 auto funcc = functionsc.find(arg);
 if (funcc != functionsc.end())
 {
 instructions.emplace_back(funcc->second);
 continue;
 }
 std::ostringstream oss;
 oss << "Error: invalid instruction: " << arg;
 throw std::runtime_error(oss.str());
 }
 return instructions;
 }
 static T process(const std::vector<instruction>& instructions)
 {
 std::vector<T> stack;
 stack.reserve(instructions.size());
 for (size_t i = 0; i < instructions.size(); i++)
 {
 const auto& inst = instructions[i];
 switch (inst.index())
 {
 case 0:
 stack.emplace_back(std::get<T>(inst));
 break;
 case 1:
 stack.emplace_back(std::get<func1arg>(inst)(pop(stack)));
 break;
 case 2:
 stack.emplace_back(std::get<func2arg>(inst)(pop(stack), pop(stack)));
 break;
 case 3:
 std::get<funcctrl>(inst)(stack, i);
 break;
 }
 }
 return stack.empty() ? 0 : stack.back();
 }
};
int main(int argc, char* argv[])
{
 if (argc == 1)
 {
 // simple loop, expected result: 10000000
 std::cout << stackprog<int>::run("0 1 add 1000000 1 jlt") << '\n';
 // hypot, expected result: 5
 std::cout << stackprog<double>::run("3 2 pow 4 2 pow add sqrt") << '\n';
 // calculating pi, expected result: pi to the fifth digit
 std::cout << stackprog<double>::run("0 -1 1 get 2 add 1 set 4 1 get div 0 get add 0 set 0 get 1 get 2 add 1 set 4 1 get div sub 0 set 1 get 1e6 4 jlt 0 get") << '\n';
 // Hello World, expected result: Hello, World!
 std::cout << stackprog<char>::run("72 wrt 101 wrt 108 wrt 108 wrt 111 wrt 44 wrt 32 wrt 87 wrt 111 wrt 114 wrt 108 wrt 100 wrt 33");
 }
 else
 {
 // execute from command line arguments
 try
 {
 std::cout << stackprog<double>::run(std::vector<std::string>(argv + 1, argv + argc));
 }
 catch (std::exception err)
 {
 std::cerr << err.what();
 return 1;
 }
 }
}

Question 2

Security / error handling:

I don't think it's a good idea to allow the user to crash the program, or (worse) achieve undefined behavior when running the interpreter. This means we have to check that stack indices are valid before we use them, that we don't try to pop from an empty stack, and that the stack has only one item on it at the end.

We can either check manually, and throw, or use std::vector::at().

The rest of this review is mainly nitpicking.

Unnecessary string copies:

(Since this is tagged performance ̄\_(ツ)_/ ̄)

static T run(const std::string& prog)
{
 std::vector<std::string> arguments;
 std::stringstream ss(prog); // copy of entire input string
 std::string item;
 while (getline(ss, item, ' ')) // copy of every argument
 arguments.push_back(item); // copy of every argument (again)
 return run(arguments);
}

Honestly this is a perfectly normal way of doing it, ~~(削除) but streams are awful and should be avoided (削除ここまで)~~ but we could use a vector of std::string_view for the arguments, and write our own getline function to split the string, and thus avoid making any copies at all. Hopefully someday the standards committee will get around to fixing this bit of the standard library, but until then we have to do it ourselves.

I think it would be reasonable to parse directly into the instructions, instead of creating an intermediate vector. (Though I guess that would require concatenating the command line arguments).

 static std::map<std::string, func1arg> functions1{
 static std::map<std::string, func2arg> functions2{
 static std::map<std::string, funcctrl> functionsc{

We could use std::string_view for the keys here.

The maps could also be const.

 ...
 for (auto arg : args) // unnecessary copy of every arg

It looks like could use auto const& arg here.

 std::ostringstream oss;
 oss << "Error: invalid instruction: " << arg;
 throw std::runtime_error(oss.str());

arg is already a std::string, so we could just throw std::runtime_error("Error: invalid instruction: " + arg);. Even if it wasn't, we could use std::to_string(arg) instead of a~~(削除) n evil (削除ここまで)~~ stringstream.

std::vector<std::string>(argv + 1, argv + argc)

Again, if we used std::vector<std::string_view> we can avoid the copies here.

Modern `if` conditions:

 auto func1 = functions1.find(arg);
 if (func1 != functions1.end())
 {
 instructions.emplace_back(func1->second);
 continue;
 }

Can be written as:

 if (auto f1 = functions1.find(arg); f1 != functions1.end())
 {
 instructions.push_back(f1->second);
 continue;
 }

Which keeps f1 in as small a scope as possible.

Likewise:

 T number;
 auto ret = std::from_chars(arg.data(), arg.data() + arg.size(), number);
 if (ret.ec == std::errc() && ret.ptr == arg.data() + arg.size())
 {
 instructions.emplace_back(number);
 continue;
 }

This is definitely complex enough to factor out into a separate function: std::optional<T> to_number(std::string const& str);. Then we could do:

 if (auto n = to_number(arg); n.has_value())
 {
 instructions.push_back(n.value());
 continue;
 }

Magic numbers vs `std::visit`:

 for (size_t i = 0; i < instructions.size(); i++)
 {
 const auto& inst = instructions[i];
 switch (inst.index())
 {
 case 0:
 stack.emplace_back(std::get<T>(inst));
 break;
 case 1:
 stack.emplace_back(std::get<func1arg>(inst)(pop(stack)));
 break;
 case 2:
 stack.emplace_back(std::get<func2arg>(inst)(pop(stack), pop(stack)));
 break;
 case 3:
 std::get<funcctrl>(inst)(stack, i);
 break;
 }
 }

Using magic number indices like this is a bit... icky.

To use std::visit, we can either declare a visitor struct inside the function:

 struct visitor
 {
 std::size_t& i;
 std::vector<T>& stack;
 void operator()(T t) { stack.push_back(t); }
 void operator()(func1arg f1) { stack.push_back(f1(pop(stack))); }
 void operator()(func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); }
 void operator()(funcctrl fc) { fc(stack, i); }
 };
 for (size_t i = 0; i < instructions.size(); i++)
 std::visit(visitor{ i, stack }, instructions[i]);

Which is a bit messy, because we only use the index sometimes.

Or we can use the inheritance trick from here with some lambdas:

template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
...
 for (size_t i = 0; i < instructions.size(); i++)
 {
 std::visit(
 overloaded
 {
 [&] (T t) { stack.push_back(t); },
 [&] (func1arg f1) { stack.push_back(f1(pop(stack))); },
 [&] (func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); },
 [&] (funcctrl fc) { fc(stack, i); } 
 },
 instructions[i]);
 }

Neither of these methods is perfect, but they're perhaps a little clearer.

Popping:

static_cast<std::size_t>(std::lround(pop(stack)))

There are enough of these around that it would be worth factoring out into a pop_stack_index function.

Magic numbers:

 catch (std::exception err)
 {
 std::cerr << err.what() << std::endl;
 return 1;
 }

We could use EXIT_FAILURE from <cstdlib>.

Question 3

Thanks for the review and good tips. Do you have any thoughts about wrapping stackprog in a class? I initially tried to do it with stand-alone functions in a stackprog namespace, but made a huge mess of the templates.

Question 4

I was actually going to say "it's all static functions and no state, so it could be a namespace", but then I saw that it makes the templates easier. You could maybe make a couple of free functions: template<class T> run(...); that call stackprog<T>::run() and then the user wouldn't need to see stackprog at all.

Question 5

"if we used std::vector<std::string_view> we can avoid the copies" It would still make copies of the pointers and call the equivalent of strlen() for every argument. You could make run() take a std::span<const char *> args, so you can call it like so: run({argv, argv + argc}), and in parse() do a for (std::string_view arg: args) ....

Question 6

@G.Sliepen The advantage of using string_view instead of a const char* is that you don't have to re-check the length. The argument you use when looking up something in the map should already be a string or string_view, not (only) the pointer to the beginning of the split-out argument.

user673679 user673679 12.2k2 gold badges34 silver badges65 bronze badges · Accepted Answer · 2021-11-14 19:33:15Z

Security / error handling:

I don't think it's a good idea to allow the user to crash the program, or (worse) achieve undefined behavior when running the interpreter. This means we have to check that stack indices are valid before we use them, that we don't try to pop from an empty stack, and that the stack has only one item on it at the end.

We can either check manually, and throw, or use std::vector::at().

The rest of this review is mainly nitpicking.

Unnecessary string copies:

(Since this is tagged performance ̄\_(ツ)_/ ̄)

static T run(const std::string& prog)
{
 std::vector<std::string> arguments;
 std::stringstream ss(prog); // copy of entire input string
 std::string item;
 while (getline(ss, item, ' ')) // copy of every argument
 arguments.push_back(item); // copy of every argument (again)
 return run(arguments);
}

Honestly this is a perfectly normal way of doing it, ~~(削除) but streams are awful and should be avoided (削除ここまで)~~ but we could use a vector of std::string_view for the arguments, and write our own getline function to split the string, and thus avoid making any copies at all. Hopefully someday the standards committee will get around to fixing this bit of the standard library, but until then we have to do it ourselves.

I think it would be reasonable to parse directly into the instructions, instead of creating an intermediate vector. (Though I guess that would require concatenating the command line arguments).

 static std::map<std::string, func1arg> functions1{
 static std::map<std::string, func2arg> functions2{
 static std::map<std::string, funcctrl> functionsc{

We could use std::string_view for the keys here.

The maps could also be const.

 ...
 for (auto arg : args) // unnecessary copy of every arg

It looks like could use auto const& arg here.

 std::ostringstream oss;
 oss << "Error: invalid instruction: " << arg;
 throw std::runtime_error(oss.str());

arg is already a std::string, so we could just throw std::runtime_error("Error: invalid instruction: " + arg);. Even if it wasn't, we could use std::to_string(arg) instead of a~~(削除) n evil (削除ここまで)~~ stringstream.

std::vector<std::string>(argv + 1, argv + argc)

Again, if we used std::vector<std::string_view> we can avoid the copies here.

Modern `if` conditions:

 auto func1 = functions1.find(arg);
 if (func1 != functions1.end())
 {
 instructions.emplace_back(func1->second);
 continue;
 }

Can be written as:

 if (auto f1 = functions1.find(arg); f1 != functions1.end())
 {
 instructions.push_back(f1->second);
 continue;
 }

Which keeps f1 in as small a scope as possible.

Likewise:

 T number;
 auto ret = std::from_chars(arg.data(), arg.data() + arg.size(), number);
 if (ret.ec == std::errc() && ret.ptr == arg.data() + arg.size())
 {
 instructions.emplace_back(number);
 continue;
 }

This is definitely complex enough to factor out into a separate function: std::optional<T> to_number(std::string const& str);. Then we could do:

 if (auto n = to_number(arg); n.has_value())
 {
 instructions.push_back(n.value());
 continue;
 }

Magic numbers vs `std::visit`:

 for (size_t i = 0; i < instructions.size(); i++)
 {
 const auto& inst = instructions[i];
 switch (inst.index())
 {
 case 0:
 stack.emplace_back(std::get<T>(inst));
 break;
 case 1:
 stack.emplace_back(std::get<func1arg>(inst)(pop(stack)));
 break;
 case 2:
 stack.emplace_back(std::get<func2arg>(inst)(pop(stack), pop(stack)));
 break;
 case 3:
 std::get<funcctrl>(inst)(stack, i);
 break;
 }
 }

Using magic number indices like this is a bit... icky.

To use std::visit, we can either declare a visitor struct inside the function:

 struct visitor
 {
 std::size_t& i;
 std::vector<T>& stack;
 void operator()(T t) { stack.push_back(t); }
 void operator()(func1arg f1) { stack.push_back(f1(pop(stack))); }
 void operator()(func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); }
 void operator()(funcctrl fc) { fc(stack, i); }
 };
 for (size_t i = 0; i < instructions.size(); i++)
 std::visit(visitor{ i, stack }, instructions[i]);

Which is a bit messy, because we only use the index sometimes.

Or we can use the inheritance trick from here with some lambdas:

template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
...
 for (size_t i = 0; i < instructions.size(); i++)
 {
 std::visit(
 overloaded
 {
 [&] (T t) { stack.push_back(t); },
 [&] (func1arg f1) { stack.push_back(f1(pop(stack))); },
 [&] (func2arg f2) { stack.push_back(f2(pop(stack), pop(stack))); },
 [&] (funcctrl fc) { fc(stack, i); } 
 },
 instructions[i]);
 }

Neither of these methods is perfect, but they're perhaps a little clearer.

Popping:

static_cast<std::size_t>(std::lround(pop(stack)))

There are enough of these around that it would be worth factoring out into a pop_stack_index function.

Magic numbers:

 catch (std::exception err)
 {
 std::cerr << err.what() << std::endl;
 return 1;
 }

We could use EXIT_FAILURE from <cstdlib>.

Thanks for the review and good tips. Do you have any thoughts about wrapping stackprog in a class? I initially tried to do it with stand-alone functions in a stackprog namespace, but made a huge mess of the templates.
I was actually going to say "it's all static functions and no state, so it could be a namespace", but then I saw that it makes the templates easier. You could maybe make a couple of free functions: template<class T> run(...); that call stackprog<T>::run() and then the user wouldn't need to see stackprog at all.
"if we used std::vector<std::string_view> we can avoid the copies" It would still make copies of the pointers and call the equivalent of strlen() for every argument. You could make run() take a std::span<const char *> args, so you can call it like so: run({argv, argv + argc}), and in parse() do a for (std::string_view arg: args) ....
@G.Sliepen The advantage of using string_view instead of a const char* is that you don't have to re-check the length. The argument you use when looking up something in the map should already be a string or string_view, not (only) the pointer to the beginning of the split-out argument.

Stack Exchange Network

Simple stack based interpreter

1 Answer 1

Security / error handling:

Unnecessary string copies:

Modern `if` conditions:

Magic numbers vs `std::visit`:

Popping:

Magic numbers:

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

Simple stack based interpreter

1 Answer 1

Security / error handling:

Unnecessary string copies:

Modern if conditions:

Magic numbers vs std::visit:

Popping:

Magic numbers:

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions

Modern `if` conditions:

Magic numbers vs `std::visit`: