I've written a "simple" interpreter for a programming language in Python, called Tellurium (named after the element of the same name). So far, it's working pretty well, but there are some things I'd like to fix/remove.
Tellurium is a tape-based language, so it uses a tape to store data.
import string, sys, random, time, codecs
tape = [0] * 25500
funcs = {}
variables = {}
readingStr = False
readingLoopAmount = False
readingLoopCode = False
readingRand = False
readingRand2 = False
readingFName = False
readingFCode = False
readingName = False
readingVName = False
readingFileName = False
readingVText = False
readingVName2 = False
appendToFront = False
appendToBack = False
loopInf = False
loopRand = False
string = False
isChar = False
fileName = []
vName = []
vText = []
vName2 = []
tempText = []
tempName = []
fName = []
fCode = []
text = []
rand = []
rand2 = []
loopCode = []
loopAmount = []
selected = 0
def prompt():
cmd = input("> ")
return cmd
def read(cmd):
if "!K" in cmd:
cmd = cmd.replace("!K", "1000")
if "!H" in cmd:
cmd = cmd.replace("!H", "100")
commands = len(cmd)
tokens = list(cmd)
for i in range(0, commands):
parse(tokens[i])
def parse(cmd):
# Sorry for all these globals...
global tape
global funcs
global variables
global readingStr
global readingFileName
global readingLoopAmount
global readingLoopCode
global readingRand
global readingRand2
global readingFName
global readingFCode
global readingName
global readingVName
global readingVText
global readingVName2
global appendToFront
global appendToBack
global loopInf
global loopRand
global vName, vText, vName2
global fileName
global string
global isChar
global tempName
global tempText
global fName
global fCode
global text
global rand
global rand2
global loopCode
global loopAmount
global selected
if readingFileName == True:
if cmd == "]":
readingFileName = False
f = open(''.join(fileName), 'r')
code = f.read()
f.close()
read(code)
fileName = []
else:
fileName.append(cmd)
elif readingVName2 == True:
if cmd == ".":
readingVName2 = False
tape[selected] = variables[''.join(vName2)]
vName2 = []
else:
vName2.append(cmd)
elif readingVName == True:
if cmd == "|":
readingVText = True
readingVName = False
else:
vName.append(cmd)
elif readingVText == True:
if cmd == "]":
readingVText = False
name = ''.join(vName)
val = ''.join(vText)
variables[name] = val
vName = []
vText = []
else:
vText.append(cmd)
elif readingName == True:
if cmd == ".":
readingName = False
name = ''.join(tempName)
read(funcs[name])
tempName = []
else:
tempName.append(cmd)
elif readingFName == True:
if cmd == "|":
readingFName = False
readingFCode = True
else:
fName.append(cmd)
elif readingFCode == True:
if cmd == "`":
readingFCode = False
name = ''.join(fName)
code = ''.join(fCode)
funcs[name] = code
fName = []
fCode = []
else:
fCode.append(cmd)
elif readingRand == True:
if cmd == "|":
readingRand = False
else:
rand.append(cmd)
elif readingRand2 == True:
if cmd == "|":
readingRand2 = False
else:
rand2.append(cmd)
elif string == True:
if appendToFront == True:
if cmd == "~":
tape[selected] = str(tape[selected]) + ''.join(tempText)
tempText = []
appendToFront = False
elif cmd == "$":
tape[selected] = str(tape[selected]) + str(tape[selected-1])
appendToFront = False
tempText = []
else:
tempText.append(cmd)
elif appendToBack == True:
if cmd == "~":
tape[selected] = ''.join(tempText) + str(tape[selected])
tempText = []
appendToBack = False
elif cmd == "$":
tape[selected] = str(tape[selected-1]) + str(tape[selected])
appendToFront = False
tempText = []
else:
tempText.append(cmd)
elif cmd == "r":
tape[selected] = tape[selected].reverse()
elif cmd == "u":
tape[selected] = tape[selected].upper()
elif cmd == "l":
tape[selected] = tape[selected].lower()
elif cmd == "a":
appendToFront = True
elif cmd == "b":
appendToBack = True
elif cmd == ".":
string = False
elif readingLoopAmount == True:
if cmd == "|":
readingLoopAmount = False
readingLoopCode = True
elif cmd == "i":
loopInf = True
elif cmd == "r":
loopRand = True
else:
loopAmount.append(cmd)
elif readingLoopCode == True:
if cmd == "]":
readingLoopCode = False
if loopInf == True:
while 1:
read(loopCode)
if loopRand == True:
if rand and rand2 == []:
for i in range(0, random.randint(0, 100)):
read(loopCode)
else:
if rand2 == []:
for i in range(0, random.randint(0, int(''.join(rand)))):
read(loopCode)
elif rand == []:
for i in range(0, random.randint(int(''.join(rand2)), 100)):
read(loopCode)
else:
for i in range(0, random.randint(int(''.join(rand2)), int(''.join(rand)))):
read(loopCode)
else:
for i in range(0, int(''.join(loopAmount))):
read(loopCode)
loopCode = []
loopAmount = []
else:
loopCode.append(cmd)
elif readingStr == True:
if cmd == "~":
readingStr = False
text = ''.join(text).replace("μ", "")
tape[selected] = text
text = []
else:
text.append(cmd)
elif cmd == "+":
tape[selected] += 1
elif cmd == "-":
tape[selected] -= 1
elif cmd == ">":
selected += 1
elif cmd == "<":
selected -= 1
elif cmd == "*":
print(selected)
elif cmd == "^":
print(tape[selected])
elif cmd == "!":
if isinstance(tape[selected], str):
print(tape[selected])
else:
print(chr(tape[selected]))
elif cmd == "%":
tape[selected] = ord(tape[selected])
elif cmd == "#":
tape[selected] = 0
elif cmd == "$":
selected = 0
elif cmd == "/":
tape[selected] += 10
elif cmd == "\\":
tape[selected] -= 10
elif cmd == "{":
selected += 10
elif cmd == "}":
selected -= 10
elif cmd == '"':
tape[selected] += 100
elif cmd == "'":
tape[selected] -= 100
elif cmd == "-":
selected += 100
elif cmd == "_":
selected -= 100
elif cmd == "i":
tape[selected] = input(">> ")
elif cmd == "n":
tape[selected] = int(tape[selected])
elif cmd == "a":
tape[selected] = int(tape[selected]) + int(tape[selected+1])
elif cmd == "s":
tape[selected] = int(tape[selected]) - int(tape[selected+1])
elif cmd == "m":
tape[selected] = int(tape[selected]) * int(tape[selected+1])
elif cmd == "d":
tape[selected] = int(tape[selected]) / int(tape[selected+1])
elif cmd == "(":
readingNum = True
elif cmd == "z":
tape[selected] = tape[selected+1]
elif cmd == "x":
tape[selected] = tape[selected-1]
elif cmd == "μ":
readingStr = True
elif cmd == "[":
readingLoopAmount = True
elif cmd == "p":
return
elif cmd == ".":
exit
elif cmd == "&":
string = True
elif cmd == "→":
if rand != []:
rand = []
readingRand = True
elif cmd == "←":
if rand2 != []:
rand2 = []
readingRand2 = True
elif cmd == "t":
tape[selected] = str(time.ctime())
elif cmd == " ̈":
time.sleep(1)
elif cmd == "r":
tape[selected] = codecs.encode(str(tape[selected]), 'rot_13')
elif cmd == "n":
tape[selected] = int(tape[selected])
elif cmd == "@":
readingFName = True
elif cmd == "=":
readingName = True
elif cmd == "¤":
readingVName = True
elif cmd == ";":
readingVName2 = True
elif cmd == "0":
readingFileName = True
elif cmd == "f":
tape[selected] = float(tape[selected])
while 1:
read(prompt())
Example programs
Hello, world! -
μHello, world!~^Explanation: everything after
μand before~will be read and stored in the tape's currently selected cell.^outputs whatever is in the selected cell, which in the program isHello, world!.Cat program -
i^Explanation:
ireads input and stores it in the selected cell.^outputs whatever is in the cell.Navigating cells -
+>++>+++<<^>^>^Explanation:
+adds one to the selected cell's value.>goes forward one cell in the tape.<goes back, and^outputs whatever is in the selected cell. So, the output is123, separated by newlines.Math (addition) -
+>+<a^Explanation: If you read the above example, you'll know what the
+,>and<commands do. Theacommand is the addition command. It takes the values of the selected cell and the next one up, and adds them together. The result is stored in the selected cell. The other math commands ares(subtraction),m(multiplication) andd(division).Loop -
[5|^]Explanation: The syntax of a for loop in Tellurium is
[times|code]. So, this outputs the selected cell's value 5 times.Clock -
[i|t^ ̈]Explanation:
tstores the current time and date in the selected cell.̈waits one second before continuing the code.imakes a loop run forever. The code will keep printing the current time and date every second.Countdown -
/[10|-^ ̈]Explanation:
/adds ten to the selected cell's value. The loop after that runs the code-^ ̈ten times.-subtracts one from the cell's value.`Functions -
@a|^^^^]Explanation: this program creates a function called
a, which outputs the selected cell's value four times.Calling functions -
=a.Explanation: this calls a function called
a. (The.is needed, otherwise it won't work)Appending to strings -
μHello,&aworld!~.^`Explanation: this sets the value of the selected cell to
Hello,.&starts string manipulation mode. While in string mode, you can use theacommand to append to the end of a string. In this example, it appendsworld!to the end of the selected cell..` exits string manipulation mode.
The reason I posted this here is because the code is messy and quite unreadable. I'm looking for some tips to clean it up, remove the unneeded things, etc. Also, I'd like to remove the big list of variables and global statements, if possible. They're limiting the flexibility of the language. For example, I can't implement conditional statements until I've removed the globals. I've tried, but it hasn't worked.
1 Answer 1
Seeing as it's a long post, I'll just see where I can get, starting now.
First, prompt:
def prompt():
cmd = input("> ")
return cmd
You don't need the intermediate variable. Just write this:
def prompt():
return input("> ")
Also, you only use it once, so why not use input("> ") instead of prompt()?.
Next, read:
def read(cmd):
if "!K" in cmd:
cmd = cmd.replace("!K", "1000")
if "!H" in cmd:
cmd = cmd.replace("!H", "100")
commands = len(cmd)
tokens = list(cmd)
for i in range(0, commands):
parse(tokens[i])
The if checks can be gone, because otherwise .replace is just a no-op. (It might matter for efficiency, perhaps!). That leaves us with
def read(cmd):
cmd = cmd.replace("!K", "1000")
cmd = cmd.replace("!H", "100")
commands = len(cmd)
tokens = list(cmd)
for i in range(0, commands):
parse(tokens[i])
Also, looping over a string gives us a list of characters. Using that, we get
def read(cmd):
if isinstance(cmd, str):
cmd = cmd.replace("!K", "1000")
cmd = cmd.replace("!H", "100")
for token in cmd:
parse(token)
Which reads a bit saner. The isinstance is needed, because cmd is sometimes a list, and that does not have a replace method. (Alternatively, make sure read is always called with a str).
Binary int operations.
Browsing down further, I see the following interesting piece of code:
elif cmd == "a":
tape[selected] = int(tape[selected]) + int(tape[selected+1])
elif cmd == "s":
tape[selected] = int(tape[selected]) - int(tape[selected+1])
elif cmd == "m":
tape[selected] = int(tape[selected]) * int(tape[selected+1])
elif cmd == "d":
tape[selected] = int(tape[selected]) / int(tape[selected+1])
See the repetition? This is easily fixable, add the following to the top of your module:
import operator
INT_BINOPS = {
"a": operator.add,
"s": operator.sub,
"m": operator.mul,
"d": operator.truediv, # Maybe you meant floordiv?
}
and replace the elif change with
elif cmd in INT_BINOPS:
op = INT_BINOPS[cmd]
tape[selected] = op(int(tape[selected]), int(tape[selected + 1]))
in-place unary operations
Next, I moved the cases for /, \, ", ', n, % closer together to bring out the similarity:
elif cmd == "/":
tape[selected] += 10
elif cmd == "\\":
tape[selected] -= 10
elif cmd == '"':
tape[selected] += 100
elif cmd == "'":
tape[selected] -= 100
elif cmd == "n":
tape[selected] = int(tape[selected])
elif cmd == "%":
tape[selected] = ord(tape[selected])
Reading carefully, all have the form
tape[selected] = f(tape[selected])
for proper functions f. Let's apply the same trick. Using lambda because
import functools
INPLACE_UNARYOPS = {
"+": functools.partial(operator.add, 1),
"-": functools.partial(operator.add, -1), # can't use .sub here
"/": functools.partial(operator.add, 10),
"\\": functools.partial(operator.add, -10), # can't use .sub here
'"': functools.partial(operator.add, 100),
"'": functools.partial(operator.add, -100), # can't use .sub here
"n": int,
"%": ord,
}
and then in the elif chain use
elif cmd in INPLACE_UNARYOPS:
op = INPLACE_UNARYOPS[cmd]
tape[selected] = op(tape[selected])
(funnily enough, you actually have "n" twice, I just removed one of them).
Furthermore, I also added "r" and "f" to the dictionary as follows (because I missed those):
"r": lambda v: codecs.encode(str(v), 'rot_13'),
"f": float,
Assignment from nowhere
There are also "t", "i" and "#" which look alike. They get the same treatment as before. At the top:
FUNCS = {
"t": lambda: str(time.ctime()),
"#": lambda: 0,
"i": lambda: input(">> "),
}
And in the if-chain:
elif cmd in FUNCS:
tape[selected] = FUNCS[cmd]()
Uniform position logic
Again, make things simple. Move similar things together:
elif cmd == "$":
selected = 0
elif cmd == ">":
selected += 1
elif cmd == "<":
selected -= 1
elif cmd == "{":
selected += 10
elif cmd == "}":
selected -= 10
elif cmd == "-":
selected += 100
elif cmd == "_":
selected -= 100
You can probably expect the drill:
POSITION_ACTIONS = {
"$": lambda _: 0,
">": functools.partial(operator.add, 1),
"<": functools.partial(operator.add, -1),
"{": functools.partial(operator.add, 10),
"}": functools.partial(operator.add, -10),
"-": functools.partial(operator.add, 100),
"_": functools.partial(operator.add, -100),
}
And the handler:
elif cmd in POSITION_ACTIONS:
selected = POSITION_ACTIONS[cmd](selected)
Make sure it is later than the value-actions, because you re-used "-" (bug?).
Complex logic
Now, the above changes were fairly trivial (but beneficial!). You can probably handle some extra cases as well.
But there's also a bit of complex logic.
I'm not sure if I'm going in the right direction, so bear with me for a while.
First, I'll rename parse to _parse, and define a function parse as follows:
def parse(token):
return _parse(token)
Please convince yourself that this changes nothing.
Next, I'm going to change that again to
parser_stack = [_parse]
def parse(token):
parser_stack[-1](token)
Again, this should not change anything.
And now I'll show you why: we're going to replace the readFileName case. First we add the function
def read_filename(cmd):
global fileName
if cmd == "]":
parser_stack.pop()
f = open(''.join(fileName), 'r')
code = f.read()
f.close()
read(code)
fileName = []
else:
fileName.append(cmd)
(Which is copy-pasted from the if readingFileName case, with some modifications).
Then, we're going to replace
elif cmd == "0":
readingFileName = True
with
elif cmd == "0":
parser_stack.append(read_filename)
and we can remove the if readingFileName: case, because that's now handled by another parser.
I've given some other stuff the same handling.
Continue from here.
import string, sys, random, time, codecs
import functools
import operator
INT_BINOPS = {
"a": operator.add,
"s": operator.sub,
"m": operator.mul,
"d": operator.truediv,
}
INPLACE_UNARYOPS = {
"+": functools.partial(operator.add, 1),
"-": functools.partial(operator.add, -1),
"/": functools.partial(operator.add, 10),
"\\": functools.partial(operator.add, -10), # can't use .sub here
'"': functools.partial(operator.add, 100),
"'": functools.partial(operator.add, -100),
"n": int,
"f": float,
"%": ord,
"r": lambda v: codecs.encode(str(v), 'rot_13'),
}
FUNCS = {
"t": lambda: str(time.ctime()),
"#": lambda: 0,
"i": lambda: input(">> "),
}
POSITION_ACTIONS = {
"$": lambda _: 0,
">": functools.partial(operator.add, 1),
"<": functools.partial(operator.add, -1),
"{": functools.partial(operator.add, 10),
"}": functools.partial(operator.add, -10),
"-": functools.partial(operator.add, 100),
"_": functools.partial(operator.add, -100),
}
tape = [0] * 25500
funcs = {}
variables = {}
readingStr = False
readingLoopAmount = False
readingLoopCode = False
readingRand = False
readingRand2 = False
readingFName = False
readingFCode = False
appendToFront = False
appendToBack = False
loopInf = False
loopRand = False
string = False
isChar = False
fileName = []
tempText = []
tempName = []
fName = []
fCode = []
text = []
rand = []
rand2 = []
loopCode = []
loopAmount = []
selected = 0
def read(cmd):
if isinstance(cmd, str):
cmd = cmd.replace("!K", "1000")
cmd = cmd.replace("!H", "100")
for token in cmd:
parse(token)
def _parse(cmd):
# Sorry for all these globals...
global tape
global funcs
global variables
global readingStr
global readingLoopAmount
global readingLoopCode
global readingRand
global readingRand2
global readingFName
global readingFCode
global appendToFront
global appendToBack
global loopInf
global loopRand
global vName, vText
global fileName
global string
global isChar
global tempName
global tempText
global fName
global fCode
global text
global rand
global rand2
global loopCode
global loopAmount
global selected
if readingFName == True:
if cmd == "|":
readingFName = False
readingFCode = True
else:
fName.append(cmd)
elif readingFCode == True:
if cmd == "`":
readingFCode = False
name = ''.join(fName)
code = ''.join(fCode)
funcs[name] = code
fName = []
fCode = []
else:
fCode.append(cmd)
elif readingRand == True:
if cmd == "|":
readingRand = False
else:
rand.append(cmd)
elif readingRand2 == True:
if cmd == "|":
readingRand2 = False
else:
rand2.append(cmd)
elif string == True:
if appendToFront == True:
if cmd == "~":
tape[selected] = str(tape[selected]) + ''.join(tempText)
tempText = []
appendToFront = False
elif cmd == "$":
tape[selected] = str(tape[selected]) + str(tape[selected-1])
appendToFront = False
tempText = []
else:
tempText.append(cmd)
elif appendToBack == True:
if cmd == "~":
tape[selected] = ''.join(tempText) + str(tape[selected])
tempText = []
appendToBack = False
elif cmd == "$":
tape[selected] = str(tape[selected-1]) + str(tape[selected])
appendToFront = False
tempText = []
else:
tempText.append(cmd)
elif cmd == "r":
tape[selected] = tape[selected].reverse()
elif cmd == "u":
tape[selected] = tape[selected].upper()
elif cmd == "l":
tape[selected] = tape[selected].lower()
elif cmd == "a":
appendToFront = True
elif cmd == "b":
appendToBack = True
elif cmd == ".":
string = False
elif readingLoopAmount == True:
if cmd == "|":
readingLoopAmount = False
readingLoopCode = True
elif cmd == "i":
loopInf = True
elif cmd == "r":
loopRand = True
else:
loopAmount.append(cmd)
elif readingLoopCode == True:
if cmd == "]":
readingLoopCode = False
if loopInf == True:
while 1:
read(loopCode)
if loopRand == True:
if rand and rand2 == []:
for i in range(0, random.randint(0, 100)):
read(loopCode)
else:
if rand2 == []:
for i in range(0, random.randint(0, int(''.join(rand)))):
read(loopCode)
elif rand == []:
for i in range(0, random.randint(int(''.join(rand2)), 100)):
read(loopCode)
else:
for i in range(0, random.randint(int(''.join(rand2)), int(''.join(rand)))):
read(loopCode)
else:
for i in range(0, int(''.join(loopAmount))):
read(loopCode)
loopCode = []
loopAmount = []
else:
loopCode.append(cmd)
elif readingStr == True:
if cmd == "~":
readingStr = False
text = ''.join(text).replace("μ", "")
tape[selected] = text
text = []
else:
text.append(cmd)
elif cmd == "*":
print(selected)
elif cmd == "^":
print(tape[selected])
elif cmd == "!":
if isinstance(tape[selected], str):
print(tape[selected])
else:
print(chr(tape[selected]))
elif cmd in FUNCS:
tape[selected] = FUNCS[cmd]()
elif cmd in INPLACE_UNARYOPS:
op = INPLACE_UNARYOPS[cmd]
tape[selected] = op(tape[selected])
elif cmd in INT_BINOPS:
op = INT_BINOPS[cmd]
tape[selected] = op(int(tape[selected]), int(tape[selected + 1]))
elif cmd in POSITION_ACTIONS:
selected = POSITION_ACTIONS[cmd](selected)
elif cmd == "(":
readingNum = True
elif cmd == "z":
tape[selected] = tape[selected+1]
elif cmd == "x":
tape[selected] = tape[selected-1]
elif cmd == "μ":
readingStr = True
elif cmd == "[":
readingLoopAmount = True
elif cmd == "p":
return
elif cmd == ".":
exit
elif cmd == "&":
string = True
elif cmd == "→":
if rand != []:
rand = []
readingRand = True
elif cmd == "←":
if rand2 != []:
rand2 = []
readingRand2 = True
elif cmd == " ̈":
time.sleep(1)
elif cmd == "@":
readingFName = True
elif cmd == "=":
parser_stack.append(read_name)
elif cmd == "¤":
parser_stack.append(read_vname)
elif cmd == ";":
parser_stack.append(read_vname2)
elif cmd == "0":
parser_stack.append(read_filename)
parser_stack = [_parse]
def read_name(cmd):
global tempName
if cmd == ".":
parser_stack.pop()
name = ''.join(tempName)
read(funcs[name])
tempName = []
else:
tempName.append(cmd)
def read_vtext(cmd):
global vName, vText
if cmd == "]":
parser_stack.pop()
name = ''.join(vName)
val = ''.join(vText)
variables[name] = val
vName = []
vText = []
else:
vText.append(cmd)
def read_vname(cmd):
if cmd == "|":
parser_stack.pop()
parser_stack.append(read_vtext)
else:
vName.append(cmd)
def read_vname2(cmd):
global vName2
if cmd == ".":
parser_stack.pop()
tape[selected] = variables[''.join(vName2)]
vName2 = []
else:
vName2.append(cmd)
def read_filename(cmd):
global fileName
if cmd == "]":
parser_stack.pop()
f = open(''.join(fileName), 'r')
code = f.read()
f.close()
read(code)
fileName = []
else:
fileName.append(cmd)
def parse(token):
return parser_stack[-1](token)
while 1:
read(input("> "))
Conclusion
I hope I have given you some ideas on how to improve the code even further. I hope it makes sense to you.
I have also ignored a lot of stuff for now (PEP8, globals, etc). If you want you can continue on the path I've shown for a while, and when you see nothing left where you have an idea for improvement, feel free to ask another question for the resulting code. Hopefully it'll be split up enough that you can ask a more specific question.
-
\$\begingroup\$ Just don't edit the code in the question (adding usage examples is good though! Thank you!) \$\endgroup\$Sjoerd Job Postmus– Sjoerd Job Postmus2016年05月20日 16:27:37 +00:00Commented May 20, 2016 at 16:27
-
1\$\begingroup\$ @m654: I've updated the review with some more, but now I really need to stop tweaking it, because I also have other responsibilities. \$\endgroup\$Sjoerd Job Postmus– Sjoerd Job Postmus2016年05月20日 19:07:40 +00:00Commented May 20, 2016 at 19:07
-
\$\begingroup\$ Thanks for the tips! The code looks much better now, and I'm pretty sure it works better as well. \$\endgroup\$m654– m6542016年05月21日 07:36:57 +00:00Commented May 21, 2016 at 7:36
-
\$\begingroup\$ I haven't fully tested it. I checked github, it could use automated tests so that you can be more confident in changing the code without breaking functionality. \$\endgroup\$Sjoerd Job Postmus– Sjoerd Job Postmus2016年05月21日 07:45:47 +00:00Commented May 21, 2016 at 7:45