Skip to main content
Code Review

Return to Question

Became Hot Network Question
deleted 15 characters in body
Source Link
Madagascar
  • 10.1k
  • 1
  • 15
  • 51
5 let age = 1; let name = "Monkey"; 
let result = 10 * (20 / 2);
.
├── lexer
│  ├── lexer.cpp
│  └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│  └── repl.cpp
├── test
│  ├── lexer_test.cpp
│  └── tests.cc
└── token
 ├── token.cpp
 └── token.hpp
4 directories, 109 files
5 let age = 1; let name = "Monkey"; 
let result = 10 * (20 / 2);
.
├── lexer
│  ├── lexer.cpp
│  └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│  └── repl.cpp
├── test
│  ├── lexer_test.cpp
│  └── tests.cc
└── token
 ├── token.cpp
 └── token.hpp
4 directories, 10 files
let age = 1; let name = "Monkey"; 
let result = 10 * (20 / 2);
.
├── lexer
│  ├── lexer.cpp
│  └── lexer.hpp
├── LICENSE
├── Makefile
├── README.md
├── repl
│  └── repl.cpp
├── test
│  ├── lexer_test.cpp
└── token
 ├── token.cpp
 └── token.hpp
4 directories, 9 files
added 276 characters in body
Source Link
Madagascar
  • 10.1k
  • 1
  • 15
  • 51

Language:

Copy pasting from the book:

Here is how we bind values to names in Monkey:

5 let age = 1; let name = "Monkey"; 
let result = 10 * (20 / 2);

Besides integers, booleans and strings, the Monkey interpreter we’re going to build will also support arrays and hashes. Here’s what binding an array of integers to a name looks like:

let myArray = [1, 2, 3, 4, 5]; 

And here is a hash, where values are associated with keys:

let thorsten = {"name": "Thorsten", "age": 28}; 

Accessing the elements in arrays and hashes is done with index expressions:

myArray[0] // => 1 
thorsten["name"] // => "Thorsten" 

The let statements can also be used to bind functions to names. Here’s a small function that adds two numbers:

let add = fn(a, b) { return a + b; };

But Monkey not only supports return statements. Implicit return values are also possible, which means we can leave out the return if we want to:

let add = fn(a, b) { a + b; }; 

And calling a function is as easy as you’d expect:

add(1, 2); 

A more complex function, such as a fibonacci function that returns the Nth Fibonacci number, might look like this:

let fibonacci = fn(x) { 
 if (x == 0) { 
 0
 } else { 
 if (x == 1) {
 1 
 } else { 
 fibonacci(x - 1) + fibonacci(x - 2);
 }
 }
}; 

Note the recursive calls to fibonacci itself! Monkey also supports a special type of functions, called higher order functions. These are functions that take other functions as arguments. Here is an example:

let twice = fn(f, x) { return f(f(x)); }; 
let addTwo = fn(x) { return x + 2; }; 
twice(addTwo, 2); // => 6
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
 std::string_view input;
 std::size_t pos{};
 std::size_t read_pos{};
 char ch;
 void read_char();
 char peek_char(); const;
 bool is_letter(char c); const;
 void skip_whitespace();
 std::string_view read_ident();
 std::string_view read_int();
 std::string_view read_string();
 public:
 Lexer(const std::string_view &input);
 Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
 ch = read_pos >= input.length() ? '0円' : input[read_pos];
 pos = read_pos++;
}
char Lexer::peek_char() const
{
 return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c) const
{
 return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
 for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
 ;
}
std::string_view Lexer::read_ident()
{
 const std::size_t orig_pos{pos};
 for (; is_letter(ch); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
 const std::size_t orig_pos{pos};
 for (; isdigit(static_cast<unsigned char>(ch)); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
 /* Monkey doesn't support escape characters.
 * XXX: How to signal EOF? Returning an empty string can not be an error.
 * Raise an exception?
 */
 const std::size_t orig_pos{pos + 1};
 do {
 read_char();
 } while (ch != '"' && ch != '0円');
 return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
 skip_whitespace();
 switch (ch) {
 case '0円':
 return Token(Token::Type::Eof, "");
 case '=':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Eq, "==");
 }
 read_char();
 return Token(Token::Type::Assign, "=");
 case '+':
 read_char();
 return Token(Token::Type::Plus, "+");
 case '-':
 read_char();
 return Token(Token::Type::Minus, "-");
 case '!':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Not_eq, "!=");
 }
 read_char();
 return Token(Token::Type::Bang, "!");
 case '*':
 read_char();
 return Token(Token::Type::Asterisk, "*");
 case '/':
 read_char();
 return Token(Token::Type::Slash, "/");
 case '<':
 read_char();
 return Token(Token::Type::Lt, "<");
 case '>':
 read_char();
 return Token(Token::Type::Gt, ">");
 case ',':
 read_char();
 return Token(Token::Type::Comma, ",");
 case ';':
 read_char();
 return Token(Token::Type::Semicolon, ";");
 case ':':
 read_char();
 return Token(Token::Type::Colon, ":");
 case '(':
 read_char();
 return Token(Token::Type::Lparen, "(");
 case ')':
 read_char();
 return Token(Token::Type::Rparen, ")");
 case '{':
 read_char();
 return Token(Token::Type::Lbrace, "{");
 case '}':
 read_char();
 return Token(Token::Type::Rbrace, "}");
 case '[':
 read_char();
 return Token(Token::Type::Lbracket, "[");
 case ']':
 read_char();
 return Token(Token::Type::Rbracket, "]");
 case '"': {
 const std::string_view ident{read_string()};
 read_char();
 return Token(Token::Type::String, ident);
 }
 default:
 if (is_letter(ch)) {
 const std::string_view ident{read_ident()};
 return Token(Token::lookup_ident(ident), ident);
 } else if (std::isdigit(static_cast<unsigned char>(ch))) {
 return Token(Token::Type::Int, read_int());
 }
 Token t{Token(Token::Type::Illegal, std::string{1, ch})};
 read_char();
 return t;
 }
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
 * because it doesn't duplicate the names, making it easy to change the
 * enumeration.
 */
#define FOREACH_TOKEN(_) \
 _(Illegal) \
 _(Eof) \
 _(Ident) \
 _(Int) \
 _(String) \
 _(Assign) \
 _(Plus) \
 _(Minus) \
 _(Asterisk) \
 _(Slash) \
 _(Bang) \
 _(Lt) \
 _(Gt) \
 _(Eq) \
 _(Not_eq) \
 _(Comma) \
 _(Semicolon) \
 _(Colon) \
 _(Lparen) \
 _(Rparen) \
 _(Lbrace) \
 _(Rbrace) \
 _(Lbracket) \
 _(Rbracket) \
 _(Function) \
 _(Let) \
 _(True) \
 _(False) \
 _(If) \
 _(Else) \
 _(Return) \
 _(Return)
#define GEN_ENUM(ENUM) ENUM,
class Token {
 // FIXME: This shouldn't be a vector. How do we write an array without
 // providing a size?
 static const std::vector<std::string_view> token_strs;
 static int token_strs_count;
 public:
 enum class Type { FOREACH_TOKEN(GEN_ENUM) };
 Token::Type type;
 std::string lit;
 Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
 {
 }
 static Token::Type lookup_ident(const std::string_view &ident);
 static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
 std::string_view input;
 std::size_t pos{};
 std::size_t read_pos{};
 char ch;
 void read_char();
 char peek_char();
 bool is_letter(char c);
 void skip_whitespace();
 std::string_view read_ident();
 std::string_view read_int();
 std::string_view read_string();
 public:
 Lexer(const std::string_view &input);
 Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
 ch = read_pos >= input.length() ? '0円' : input[read_pos];
 pos = read_pos++;
}
char Lexer::peek_char()
{
 return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c)
{
 return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
 for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
 ;
}
std::string_view Lexer::read_ident()
{
 const std::size_t orig_pos{pos};
 for (; is_letter(ch); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
 const std::size_t orig_pos{pos};
 for (; isdigit(static_cast<unsigned char>(ch)); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
 /* Monkey doesn't support escape characters.
 * XXX: How to signal EOF? Returning an empty string can not be an error.
 * Raise an exception?
 */
 const std::size_t orig_pos{pos + 1};
 do {
 read_char();
 } while (ch != '"' && ch != '0円');
 return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
 skip_whitespace();
 switch (ch) {
 case '0円':
 return Token(Token::Type::Eof, "");
 case '=':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Eq, "==");
 }
 read_char();
 return Token(Token::Type::Assign, "=");
 case '+':
 read_char();
 return Token(Token::Type::Plus, "+");
 case '-':
 read_char();
 return Token(Token::Type::Minus, "-");
 case '!':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Not_eq, "!=");
 }
 read_char();
 return Token(Token::Type::Bang, "!");
 case '*':
 read_char();
 return Token(Token::Type::Asterisk, "*");
 case '/':
 read_char();
 return Token(Token::Type::Slash, "/");
 case '<':
 read_char();
 return Token(Token::Type::Lt, "<");
 case '>':
 read_char();
 return Token(Token::Type::Gt, ">");
 case ',':
 read_char();
 return Token(Token::Type::Comma, ",");
 case ';':
 read_char();
 return Token(Token::Type::Semicolon, ";");
 case ':':
 read_char();
 return Token(Token::Type::Colon, ":");
 case '(':
 read_char();
 return Token(Token::Type::Lparen, "(");
 case ')':
 read_char();
 return Token(Token::Type::Rparen, ")");
 case '{':
 read_char();
 return Token(Token::Type::Lbrace, "{");
 case '}':
 read_char();
 return Token(Token::Type::Rbrace, "}");
 case '[':
 read_char();
 return Token(Token::Type::Lbracket, "[");
 case ']':
 read_char();
 return Token(Token::Type::Rbracket, "]");
 case '"': {
 const std::string_view ident{read_string()};
 read_char();
 return Token(Token::Type::String, ident);
 }
 default:
 if (is_letter(ch)) {
 const std::string_view ident{read_ident()};
 return Token(Token::lookup_ident(ident), ident);
 } else if (std::isdigit(static_cast<unsigned char>(ch))) {
 return Token(Token::Type::Int, read_int());
 }
 Token t{Token(Token::Type::Illegal, std::string{1, ch})};
 read_char();
 return t;
 }
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
 * because it doesn't duplicate the names, making it easy to change the
 * enumeration.
 */
#define FOREACH_TOKEN(_) \
 _(Illegal) \
 _(Eof) \
 _(Ident) \
 _(Int) \
 _(String) \
 _(Assign) \
 _(Plus) \
 _(Minus) \
 _(Asterisk) \
 _(Slash) \
 _(Bang) \
 _(Lt) \
 _(Gt) \
 _(Eq) \
 _(Not_eq) \
 _(Comma) \
 _(Semicolon) \
 _(Colon) \
 _(Lparen) \
 _(Rparen) \
 _(Lbrace) \
 _(Rbrace) \
 _(Lbracket) \
 _(Rbracket) \
 _(Function) \
 _(Let) \
 _(True) \
 _(False) \
 _(If) \
 _(Else) \
 _(Return)
#define GEN_ENUM(ENUM) ENUM,
class Token {
 // FIXME: This shouldn't be a vector. How do we write an array without
 // providing a size?
 static const std::vector<std::string_view> token_strs;
 static int token_strs_count;
 public:
 enum class Type { FOREACH_TOKEN(GEN_ENUM) };
 Token::Type type;
 std::string lit;
 Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
 {
 }
 static Token::Type lookup_ident(const std::string_view &ident);
 static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */

Language:

Copy pasting from the book:

Here is how we bind values to names in Monkey:

5 let age = 1; let name = "Monkey"; 
let result = 10 * (20 / 2);

Besides integers, booleans and strings, the Monkey interpreter we’re going to build will also support arrays and hashes. Here’s what binding an array of integers to a name looks like:

let myArray = [1, 2, 3, 4, 5]; 

And here is a hash, where values are associated with keys:

let thorsten = {"name": "Thorsten", "age": 28}; 

Accessing the elements in arrays and hashes is done with index expressions:

myArray[0] // => 1 
thorsten["name"] // => "Thorsten" 

The let statements can also be used to bind functions to names. Here’s a small function that adds two numbers:

let add = fn(a, b) { return a + b; };

But Monkey not only supports return statements. Implicit return values are also possible, which means we can leave out the return if we want to:

let add = fn(a, b) { a + b; }; 

And calling a function is as easy as you’d expect:

add(1, 2); 

A more complex function, such as a fibonacci function that returns the Nth Fibonacci number, might look like this:

let fibonacci = fn(x) { 
 if (x == 0) { 
 0
 } else { 
 if (x == 1) {
 1 
 } else { 
 fibonacci(x - 1) + fibonacci(x - 2);
 }
 }
}; 

Note the recursive calls to fibonacci itself! Monkey also supports a special type of functions, called higher order functions. These are functions that take other functions as arguments. Here is an example:

let twice = fn(f, x) { return f(f(x)); }; 
let addTwo = fn(x) { return x + 2; }; 
twice(addTwo, 2); // => 6
#ifndef LEXER_HPP
#define LEXER_HPP 1
#include <cstddef>
#include <string_view>
#include "../token/token.hpp"
class Lexer {
 std::string_view input;
 std::size_t pos{};
 std::size_t read_pos{};
 char ch;
 void read_char();
 char peek_char() const;
 bool is_letter(char c) const;
 void skip_whitespace();
 std::string_view read_ident();
 std::string_view read_int();
 std::string_view read_string();
 public:
 Lexer(const std::string_view &input);
 Token next();
};
#endif /* LEXER_HPP */
#include "lexer.hpp"
#include <cctype>
#include <cstddef>
#include <string>
#include <string_view>
void Lexer::read_char()
{
 ch = read_pos >= input.length() ? '0円' : input[read_pos];
 pos = read_pos++;
}
char Lexer::peek_char() const
{
 return read_pos >= input.length() ? '0円' : input[read_pos];
}
bool Lexer::is_letter(char c) const
{
 return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
void Lexer::skip_whitespace()
{
 for (; std::isspace(static_cast<unsigned char>(ch)); read_char())
 ;
}
std::string_view Lexer::read_ident()
{
 const std::size_t orig_pos{pos};
 for (; is_letter(ch); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_int()
{
 const std::size_t orig_pos{pos};
 for (; isdigit(static_cast<unsigned char>(ch)); read_char())
 ;
 return input.substr(orig_pos, pos - orig_pos);
}
std::string_view Lexer::read_string()
{
 /* Monkey doesn't support escape characters.
 * XXX: How to signal EOF? Returning an empty string can not be an error.
 * Raise an exception?
 */
 const std::size_t orig_pos{pos + 1};
 do {
 read_char();
 } while (ch != '"' && ch != '0円');
 return input.substr(orig_pos, pos - orig_pos);
}
Lexer::Lexer(const std::string_view &input) : input(input) { read_char(); }
Token Lexer::next()
{
 skip_whitespace();
 switch (ch) {
 case '0円':
 return Token(Token::Type::Eof, "");
 case '=':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Eq, "==");
 }
 read_char();
 return Token(Token::Type::Assign, "=");
 case '+':
 read_char();
 return Token(Token::Type::Plus, "+");
 case '-':
 read_char();
 return Token(Token::Type::Minus, "-");
 case '!':
 if (peek_char() == '=') {
 read_char();
 read_char();
 return Token(Token::Type::Not_eq, "!=");
 }
 read_char();
 return Token(Token::Type::Bang, "!");
 case '*':
 read_char();
 return Token(Token::Type::Asterisk, "*");
 case '/':
 read_char();
 return Token(Token::Type::Slash, "/");
 case '<':
 read_char();
 return Token(Token::Type::Lt, "<");
 case '>':
 read_char();
 return Token(Token::Type::Gt, ">");
 case ',':
 read_char();
 return Token(Token::Type::Comma, ",");
 case ';':
 read_char();
 return Token(Token::Type::Semicolon, ";");
 case ':':
 read_char();
 return Token(Token::Type::Colon, ":");
 case '(':
 read_char();
 return Token(Token::Type::Lparen, "(");
 case ')':
 read_char();
 return Token(Token::Type::Rparen, ")");
 case '{':
 read_char();
 return Token(Token::Type::Lbrace, "{");
 case '}':
 read_char();
 return Token(Token::Type::Rbrace, "}");
 case '[':
 read_char();
 return Token(Token::Type::Lbracket, "[");
 case ']':
 read_char();
 return Token(Token::Type::Rbracket, "]");
 case '"': {
 const std::string_view ident{read_string()};
 read_char();
 return Token(Token::Type::String, ident);
 }
 default:
 if (is_letter(ch)) {
 const std::string_view ident{read_ident()};
 return Token(Token::lookup_ident(ident), ident);
 } else if (std::isdigit(static_cast<unsigned char>(ch))) {
 return Token(Token::Type::Int, read_int());
 }
 Token t{Token(Token::Type::Illegal, std::string{1, ch})};
 read_char();
 return t;
 }
}
#ifndef TOKEN_H
#define TOKEN_H 1
#include <string>
#include <string_view>
#include <vector>
/* This solution is vastly superior to any switch case or array based one,
 * because it doesn't duplicate the names, making it easy to change the
 * enumeration.
 */
#define FOREACH_TOKEN(_) \
 _(Illegal) \
 _(Eof) \
 _(Ident) \
 _(Int) \
 _(String) \
 _(Assign) \
 _(Plus) \
 _(Minus) \
 _(Asterisk) \
 _(Slash) \
 _(Bang) \
 _(Lt) \
 _(Gt) \
 _(Eq) \
 _(Not_eq) \
 _(Comma) \
 _(Semicolon) \
 _(Colon) \
 _(Lparen) \
 _(Rparen) \
 _(Lbrace) \
 _(Rbrace) \
 _(Lbracket) \
 _(Rbracket) \
 _(Function) \
 _(Let) \
 _(True) \
 _(False) \
 _(If) \
 _(Else) \
 _(Return) \
#define GEN_ENUM(ENUM) ENUM,
class Token {
 // FIXME: This shouldn't be a vector. How do we write an array without
 // providing a size?
 static const std::vector<std::string_view> token_strs;
 static int token_strs_count;
 public:
 enum class Type { FOREACH_TOKEN(GEN_ENUM) };
 Token::Type type;
 std::string lit;
 Token(Token::Type type, const std::string_view &lit) : type(type), lit(lit)
 {
 }
 static Token::Type lookup_ident(const std::string_view &ident);
 static std::string_view to_str(Token::Type t);
};
#undef GEN_ENUM
#endif /* TOKEN_H */
added 142 characters in body
Source Link
Madagascar
  • 10.1k
  • 1
  • 15
  • 51

Side(Side-note: I am completely new to C++ and OOP, so judge accordingly and feel free to critique, but refrain from suggesting grotesque, highly-obfuscated, and overly-complicated templated and range-based code snippets that would make me run for the hills.)

Side-note: I am completely new to C++ and OOP, so judge accordingly.

(Side-note: I am completely new to C++ and OOP, so judge accordingly and feel free to critique, but refrain from suggesting grotesque, highly-obfuscated, and overly-complicated templated and range-based code snippets that would make me run for the hills.)

Source Link
Madagascar
  • 10.1k
  • 1
  • 15
  • 51
Loading
lang-cpp

AltStyle によって変換されたページ (->オリジナル) /