5 let age = 1; let name = "Monkey";
let result = 10 * (20 / 2);
.
├── lexer
│ ├── lexer.cpp
│ └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│ └── repl.cpp
├── test
│ ├── lexer_test.cpp
│ └── tests.cc
└── token
├── token.cpp
└── token.hpp
4 directories, 109 files
5 let age = 1; let name = "Monkey";
let result = 10 * (20 / 2);
.
├── lexer
│ ├── lexer.cpp
│ └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│ └── repl.cpp
├── test
│ ├── lexer_test.cpp
│ └── tests.cc
└── token
├── token.cpp
└── token.hpp
4 directories, 10 files
let age = 1; let name = "Monkey";
let result = 10 * (20 / 2);
.
├── lexer
│ ├── lexer.cpp
│ └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│ └── repl.cpp
├── test
│ ├── lexer_test.cpp
└── token
├── token.cpp
└── token.hpp
4 directories, 9 files
Language:
Copy pasting from the book:
Here is how we bind values to names in Monkey:
5 let age = 1; let name = "Monkey";
let result = 10 * (20 / 2);
Besides integers, booleans and strings, the Monkey interpreter we’re going to build will also support arrays and hashes. Here’s what binding an array of integers to a name looks like:
let myArray = [1, 2, 3, 4, 5];
And here is a hash, where values are associated with keys:
let thorsten = {"name": "Thorsten", "age": 28};
Accessing the elements in arrays and hashes is done with index expressions:
myArray[0] // => 1
thorsten["name"] // => "Thorsten"
The let statements can also be used to bind functions to names. Here’s a small function that adds two numbers:
let add = fn(a, b) { return a + b; };
But Monkey not only supports return statements. Implicit return values are also possible, which means we can leave out the return if we want to:
let add = fn(a, b) { a + b; };
And calling a function is as easy as you’d expect:
add(1, 2);
A more complex function, such as a fibonacci function that returns the Nth Fibonacci number, might look like this:
let fibonacci = fn(x) {
if (x == 0) {
0
} else {
if (x == 1) {
1
} else {
fibonacci(x - 1) + fibonacci(x - 2);
}
}
};
Note the recursive calls to fibonacci itself! Monkey also supports a special type of functions, called higher order functions. These are functions that take other functions as arguments. Here is an example:
let twice = fn(f, x) { return f(f(x)); };
let addTwo = fn(x) { return x + 2; };
twice(addTwo, 2); // => 6
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
std::string_view input;
std::size_t pos{};
std::size_t read_pos{};
char ch;
void read_char();
char peek_char(); const;
bool is_letter(char c); const;
void skip_whitespace();
std::string_view read_ident();
std::string_view read_int();
std::string_view read_string();
public:
Lexer(const std::string_view &input);
Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
ch = read_pos >= input.length() ? '0円' : input[read_pos];
pos = read_pos++;
}
char Lexer::peek_char() const
{
return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c) const
{
return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
;
}
std::string_view Lexer::read_ident()
{
const std::size_t orig_pos{pos};
for (; is_letter(ch); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
const std::size_t orig_pos{pos};
for (; isdigit(static_cast<unsigned char>(ch)); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
/* Monkey doesn't support escape characters.
* XXX: How to signal EOF? Returning an empty string can not be an error.
* Raise an exception?
*/
const std::size_t orig_pos{pos + 1};
do {
read_char();
} while (ch != '"' && ch != '0円');
return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
skip_whitespace();
switch (ch) {
case '0円':
return Token(Token::Type::Eof, "");
case '=':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Eq, "==");
}
read_char();
return Token(Token::Type::Assign, "=");
case '+':
read_char();
return Token(Token::Type::Plus, "+");
case '-':
read_char();
return Token(Token::Type::Minus, "-");
case '!':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Not_eq, "!=");
}
read_char();
return Token(Token::Type::Bang, "!");
case '*':
read_char();
return Token(Token::Type::Asterisk, "*");
case '/':
read_char();
return Token(Token::Type::Slash, "/");
case '<':
read_char();
return Token(Token::Type::Lt, "<");
case '>':
read_char();
return Token(Token::Type::Gt, ">");
case ',':
read_char();
return Token(Token::Type::Comma, ",");
case ';':
read_char();
return Token(Token::Type::Semicolon, ";");
case ':':
read_char();
return Token(Token::Type::Colon, ":");
case '(':
read_char();
return Token(Token::Type::Lparen, "(");
case ')':
read_char();
return Token(Token::Type::Rparen, ")");
case '{':
read_char();
return Token(Token::Type::Lbrace, "{");
case '}':
read_char();
return Token(Token::Type::Rbrace, "}");
case '[':
read_char();
return Token(Token::Type::Lbracket, "[");
case ']':
read_char();
return Token(Token::Type::Rbracket, "]");
case '"': {
const std::string_view ident{read_string()};
read_char();
return Token(Token::Type::String, ident);
}
default:
if (is_letter(ch)) {
const std::string_view ident{read_ident()};
return Token(Token::lookup_ident(ident), ident);
} else if (std::isdigit(static_cast<unsigned char>(ch))) {
return Token(Token::Type::Int, read_int());
}
Token t{Token(Token::Type::Illegal, std::string{1, ch})};
read_char();
return t;
}
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
* because it doesn't duplicate the names, making it easy to change the
* enumeration.
*/
#define FOREACH_TOKEN(_) \
_(Illegal) \
_(Eof) \
_(Ident) \
_(Int) \
_(String) \
_(Assign) \
_(Plus) \
_(Minus) \
_(Asterisk) \
_(Slash) \
_(Bang) \
_(Lt) \
_(Gt) \
_(Eq) \
_(Not_eq) \
_(Comma) \
_(Semicolon) \
_(Colon) \
_(Lparen) \
_(Rparen) \
_(Lbrace) \
_(Rbrace) \
_(Lbracket) \
_(Rbracket) \
_(Function) \
_(Let) \
_(True) \
_(False) \
_(If) \
_(Else) \
_(Return) \
_(Return)
#define GEN_ENUM(ENUM) ENUM,
class Token {
// FIXME: This shouldn't be a vector. How do we write an array without
// providing a size?
static const std::vector<std::string_view> token_strs;
static int token_strs_count;
public:
enum class Type { FOREACH_TOKEN(GEN_ENUM) };
Token::Type type;
std::string lit;
Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
{
}
static Token::Type lookup_ident(const std::string_view &ident);
static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
std::string_view input;
std::size_t pos{};
std::size_t read_pos{};
char ch;
void read_char();
char peek_char();
bool is_letter(char c);
void skip_whitespace();
std::string_view read_ident();
std::string_view read_int();
std::string_view read_string();
public:
Lexer(const std::string_view &input);
Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
ch = read_pos >= input.length() ? '0円' : input[read_pos];
pos = read_pos++;
}
char Lexer::peek_char()
{
return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c)
{
return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
;
}
std::string_view Lexer::read_ident()
{
const std::size_t orig_pos{pos};
for (; is_letter(ch); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
const std::size_t orig_pos{pos};
for (; isdigit(static_cast<unsigned char>(ch)); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
/* Monkey doesn't support escape characters.
* XXX: How to signal EOF? Returning an empty string can not be an error.
* Raise an exception?
*/
const std::size_t orig_pos{pos + 1};
do {
read_char();
} while (ch != '"' && ch != '0円');
return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
skip_whitespace();
switch (ch) {
case '0円':
return Token(Token::Type::Eof, "");
case '=':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Eq, "==");
}
read_char();
return Token(Token::Type::Assign, "=");
case '+':
read_char();
return Token(Token::Type::Plus, "+");
case '-':
read_char();
return Token(Token::Type::Minus, "-");
case '!':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Not_eq, "!=");
}
read_char();
return Token(Token::Type::Bang, "!");
case '*':
read_char();
return Token(Token::Type::Asterisk, "*");
case '/':
read_char();
return Token(Token::Type::Slash, "/");
case '<':
read_char();
return Token(Token::Type::Lt, "<");
case '>':
read_char();
return Token(Token::Type::Gt, ">");
case ',':
read_char();
return Token(Token::Type::Comma, ",");
case ';':
read_char();
return Token(Token::Type::Semicolon, ";");
case ':':
read_char();
return Token(Token::Type::Colon, ":");
case '(':
read_char();
return Token(Token::Type::Lparen, "(");
case ')':
read_char();
return Token(Token::Type::Rparen, ")");
case '{':
read_char();
return Token(Token::Type::Lbrace, "{");
case '}':
read_char();
return Token(Token::Type::Rbrace, "}");
case '[':
read_char();
return Token(Token::Type::Lbracket, "[");
case ']':
read_char();
return Token(Token::Type::Rbracket, "]");
case '"': {
const std::string_view ident{read_string()};
read_char();
return Token(Token::Type::String, ident);
}
default:
if (is_letter(ch)) {
const std::string_view ident{read_ident()};
return Token(Token::lookup_ident(ident), ident);
} else if (std::isdigit(static_cast<unsigned char>(ch))) {
return Token(Token::Type::Int, read_int());
}
Token t{Token(Token::Type::Illegal, std::string{1, ch})};
read_char();
return t;
}
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
* because it doesn't duplicate the names, making it easy to change the
* enumeration.
*/
#define FOREACH_TOKEN(_) \
_(Illegal) \
_(Eof) \
_(Ident) \
_(Int) \
_(String) \
_(Assign) \
_(Plus) \
_(Minus) \
_(Asterisk) \
_(Slash) \
_(Bang) \
_(Lt) \
_(Gt) \
_(Eq) \
_(Not_eq) \
_(Comma) \
_(Semicolon) \
_(Colon) \
_(Lparen) \
_(Rparen) \
_(Lbrace) \
_(Rbrace) \
_(Lbracket) \
_(Rbracket) \
_(Function) \
_(Let) \
_(True) \
_(False) \
_(If) \
_(Else) \
_(Return)
#define GEN_ENUM(ENUM) ENUM,
class Token {
// FIXME: This shouldn't be a vector. How do we write an array without
// providing a size?
static const std::vector<std::string_view> token_strs;
static int token_strs_count;
public:
enum class Type { FOREACH_TOKEN(GEN_ENUM) };
Token::Type type;
std::string lit;
Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
{
}
static Token::Type lookup_ident(const std::string_view &ident);
static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */
Language:
Copy pasting from the book:
Here is how we bind values to names in Monkey:
5 let age = 1; let name = "Monkey";
let result = 10 * (20 / 2);
Besides integers, booleans and strings, the Monkey interpreter we’re going to build will also support arrays and hashes. Here’s what binding an array of integers to a name looks like:
let myArray = [1, 2, 3, 4, 5];
And here is a hash, where values are associated with keys:
let thorsten = {"name": "Thorsten", "age": 28};
Accessing the elements in arrays and hashes is done with index expressions:
myArray[0] // => 1
thorsten["name"] // => "Thorsten"
The let statements can also be used to bind functions to names. Here’s a small function that adds two numbers:
let add = fn(a, b) { return a + b; };
But Monkey not only supports return statements. Implicit return values are also possible, which means we can leave out the return if we want to:
let add = fn(a, b) { a + b; };
And calling a function is as easy as you’d expect:
add(1, 2);
A more complex function, such as a fibonacci function that returns the Nth Fibonacci number, might look like this:
let fibonacci = fn(x) {
if (x == 0) {
0
} else {
if (x == 1) {
1
} else {
fibonacci(x - 1) + fibonacci(x - 2);
}
}
};
Note the recursive calls to fibonacci itself! Monkey also supports a special type of functions, called higher order functions. These are functions that take other functions as arguments. Here is an example:
let twice = fn(f, x) { return f(f(x)); };
let addTwo = fn(x) { return x + 2; };
twice(addTwo, 2); // => 6
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
std::string_view input;
std::size_t pos{};
std::size_t read_pos{};
char ch;
void read_char();
char peek_char() const;
bool is_letter(char c) const;
void skip_whitespace();
std::string_view read_ident();
std::string_view read_int();
std::string_view read_string();
public:
Lexer(const std::string_view &input);
Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
ch = read_pos >= input.length() ? '0円' : input[read_pos];
pos = read_pos++;
}
char Lexer::peek_char() const
{
return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c) const
{
return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
;
}
std::string_view Lexer::read_ident()
{
const std::size_t orig_pos{pos};
for (; is_letter(ch); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
const std::size_t orig_pos{pos};
for (; isdigit(static_cast<unsigned char>(ch)); read_char())
;
return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
/* Monkey doesn't support escape characters.
* XXX: How to signal EOF? Returning an empty string can not be an error.
* Raise an exception?
*/
const std::size_t orig_pos{pos + 1};
do {
read_char();
} while (ch != '"' && ch != '0円');
return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
skip_whitespace();
switch (ch) {
case '0円':
return Token(Token::Type::Eof, "");
case '=':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Eq, "==");
}
read_char();
return Token(Token::Type::Assign, "=");
case '+':
read_char();
return Token(Token::Type::Plus, "+");
case '-':
read_char();
return Token(Token::Type::Minus, "-");
case '!':
if (peek_char() == '=') {
read_char();
read_char();
return Token(Token::Type::Not_eq, "!=");
}
read_char();
return Token(Token::Type::Bang, "!");
case '*':
read_char();
return Token(Token::Type::Asterisk, "*");
case '/':
read_char();
return Token(Token::Type::Slash, "/");
case '<':
read_char();
return Token(Token::Type::Lt, "<");
case '>':
read_char();
return Token(Token::Type::Gt, ">");
case ',':
read_char();
return Token(Token::Type::Comma, ",");
case ';':
read_char();
return Token(Token::Type::Semicolon, ";");
case ':':
read_char();
return Token(Token::Type::Colon, ":");
case '(':
read_char();
return Token(Token::Type::Lparen, "(");
case ')':
read_char();
return Token(Token::Type::Rparen, ")");
case '{':
read_char();
return Token(Token::Type::Lbrace, "{");
case '}':
read_char();
return Token(Token::Type::Rbrace, "}");
case '[':
read_char();
return Token(Token::Type::Lbracket, "[");
case ']':
read_char();
return Token(Token::Type::Rbracket, "]");
case '"': {
const std::string_view ident{read_string()};
read_char();
return Token(Token::Type::String, ident);
}
default:
if (is_letter(ch)) {
const std::string_view ident{read_ident()};
return Token(Token::lookup_ident(ident), ident);
} else if (std::isdigit(static_cast<unsigned char>(ch))) {
return Token(Token::Type::Int, read_int());
}
Token t{Token(Token::Type::Illegal, std::string{1, ch})};
read_char();
return t;
}
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
* because it doesn't duplicate the names, making it easy to change the
* enumeration.
*/
#define FOREACH_TOKEN(_) \
_(Illegal) \
_(Eof) \
_(Ident) \
_(Int) \
_(String) \
_(Assign) \
_(Plus) \
_(Minus) \
_(Asterisk) \
_(Slash) \
_(Bang) \
_(Lt) \
_(Gt) \
_(Eq) \
_(Not_eq) \
_(Comma) \
_(Semicolon) \
_(Colon) \
_(Lparen) \
_(Rparen) \
_(Lbrace) \
_(Rbrace) \
_(Lbracket) \
_(Rbracket) \
_(Function) \
_(Let) \
_(True) \
_(False) \
_(If) \
_(Else) \
_(Return) \
#define GEN_ENUM(ENUM) ENUM,
class Token {
// FIXME: This shouldn't be a vector. How do we write an array without
// providing a size?
static const std::vector<std::string_view> token_strs;
static int token_strs_count;
public:
enum class Type { FOREACH_TOKEN(GEN_ENUM) };
Token::Type type;
std::string lit;
Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
{
}
static Token::Type lookup_ident(const std::string_view &ident);
static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */
Side(Side-note: I am completely new to C++ and OOP, so judge accordingly and feel free to critique, but refrain from suggesting grotesque, highly-obfuscated, and overly-complicated templated and range-based code snippets that would make me run for the hills.)
Side-note: I am completely new to C++ and OOP, so judge accordingly.
(Side-note: I am completely new to C++ and OOP, so judge accordingly and feel free to critique, but refrain from suggesting grotesque, highly-obfuscated, and overly-complicated templated and range-based code snippets that would make me run for the hills.)