My Python implementation of for Hack Assembly Language. See this question for the Java implementation. Any comment about best practices and performance improvement?
import sys
import os
def usage():
print "Usage: %s namefile.asm" % os.path.basename(sys.argv[0])
def tobin(val, nbits):
return bin((val + (1 << nbits)) % (1 << nbits))[2:]
def main():
#command-line argument
args = sys.argv[1:]
if "-h" in args or "--help" in args or args==[]:
usage()
sys.exit(2)
fw = open(args[0].split('.')[0]+".hack",'w')
#init compTable
compT = {
'0':'0101010', '1':'0111111', '-1':'0111010', 'D':'0001100', 'A':'0110000',
'!D':'0001101', '!A':'0110001', '-D':'0001111', '-A':'0110011','D+1':'0011111',
'A+1':'0110111','D-1':'0001110','D+A':'0000010','D-A':'0010011','A-D':'0000111',
'D&A':'0000000','D|A':'0010101', 'M':'1110000', '!M':'1110001', '-M':'1110011',
'M+1':'1110111','M-1':'1110010','D+M':'1000010','D-M':'1010011','M-D':'1000111',
'D&M':'1000000','D|M':'1010101','A-1':'0110010'
}
#init destTable
destT ={'null':'000','M':'001','D':'010','MD':'011',
'A':'100','AM':'101','AD':'110','AMD':'111'}
#init jumpTable
jumpT ={'null':'000','JGT':'001','JEQ':'010','JGE':'011',
'JLT':'100','JNE':'101','JLE':'110','JMP':'111'}
#init SymbolTable
SymbolTable ={'SP':0,'LCL':1,'ARG':2,'THIS':3,'THAT':4,'SCREEN':16384,'KBD':24576,
'R0':0,'R1':1,'R2':2,'R3':3,'R4':4,'R5':5,'R6':6,'R7':7,
'R8':8,'R9':9,'R10':10,'R11':11,'R12':12,'R13':13,'R14':14,'R15':15}
#first pass
instr_count = 0
with open(args[0]) as f:
for i, l in enumerate(f):
line = l.strip() #strip space
line = line.split('//')[0].strip() #strip comment
if not line:
continue
if line[0]=='(':
symbol = line[1:].split(')')[0]
SymbolTable[symbol] = instr_count #ignoring double symbol error
continue
#ignoring invalid A_COMMAND & C_COMMAND
instr_count += 1
#second pass
instr_count = 0
symbol_counter = 16
with open(args[0]) as f:
for i, l in enumerate(f):
line = l.strip() #strip space
line = line.split('//')[0].strip() #strip comment
if not line:
continue
if line[0]=='(':
continue
#--------------A-COMMAND------------------
if line[0]=='@':
token = line[1:]
if not token.lstrip('-').isdigit():
if token not in SymbolTable:
SymbolTable[token] = symbol_counter
symbol_counter +=1
token = SymbolTable[token]
#print "0{0:015b}".format(token) #ignoring unknown symbol
fw.write("0{0:015b}\n".format(token))
else:
#--- TODO: dealing with negative number
if token[0] == '-':
#print '0'+tobin(int(token),15)
fw.write('0'+tobin(int(token),15)+'\n')
else:
#print "0{0:015b}".format(int(token)) #ignoring unknown symbol
fw.write("0{0:015b}\n".format(int(token)))
instr_count += 1
continue
#--------------C-COMMAND------------------
comp = dest = jump = ""
if '=' in line:
token = line.split('=')
dest = destT[token[0]]
line = token[1]
else:
dest = destT['null']
token = line.split(';')
comp = compT[token[0]]
if ';' in line:
jump = jumpT[token[1]]
else:
jump = jumpT['null']
#print '111'+comp+dest+jump
fw.write('111'+comp+dest+jump+'\n')
instr_count += 1
fw.close()
if __name__ == "__main__":
main()
-
1\$\begingroup\$ Welcome to Code Review! I hope you get some great answers! \$\endgroup\$Phrancis– Phrancis2015年05月19日 20:21:56 +00:00Commented May 19, 2015 at 20:21
2 Answers 2
Working with file handles
The program opens a filehandle for writing early on:
fw = open(args[0].split('.')[0]+".hack",'w')
But then it's not used until much further down.
It would be better to open filehandles right before you really need them,
and use the with open(...) as
syntax,
instead of managing it manually,
to protect yourself from accidentally forgetting to close.
Decompose to smaller functions
The main
functions does too much.
It would be better to split it up to multiple smaller functions with descriptive names.
It would be ideal to decompose to smaller functions with descriptive names that comments would become unnecessary.
The constant dictionaries you define at the beginning of the function make the implementation look a bit noisy. It might be better to move these near the top of the file.
Naming
The variables are quite poorly named. It's quite hard to follow the code this way. Try to come up with better, more descriptive names.
Avoid single-letter variable names,
especially names like l
,
that may look like 1
with some fonts,
and can be especially confusing when used near another variable named i
.
Command line parsing
Instead of import sys
and parsing command line flags yourself,
I suggest to take a look at argparse
.
It's really easy to use, and it's awesome.
Coding style
The posted code has violates PEP8 on many counts. I suggest to review the guidelines and apply carefully
The indentation is odd at a few places. For example the
return
statement intobin
snake_case
is recommended for variable and function names. For example,symbol_table
would be better thanSymbolTable
. Alsoto_bin
better thantobin
. Just to name a few.A more idiomatic way of writing
line[0]=='('
isline.startswith('(')
Prefer Python 3
Judging by the print
statement without parentheses,
it seems you're on Python 2.
Consider switching to Python 3.
-
\$\begingroup\$ I posted the revised code as new answer, @janos \$\endgroup\$inyoot– inyoot2015年05月20日 18:25:12 +00:00Commented May 20, 2015 at 18:25
-
\$\begingroup\$ Let me know if you have other comments/inputs specially about performance improvement! \$\endgroup\$inyoot– inyoot2015年05月22日 13:46:26 +00:00Commented May 22, 2015 at 13:46
Here is my revised code. It passed PEP8 online check. I changed it into python3. I fixed the file handles, parsed the filename using argparse
, used better naming schemes, and changed into more pythonic/idiomatic way of writing. Basically, it just following all the inputs from @janos.
import argparse
# init compTable
compTable = {
'0': '0101010', '1': '0111111', '-1': '0111010', 'D': '0001100',
'A': '0110000', '!D': '0001101', '!A': '0110001', '-D': '0001111',
'-A': '0110011', 'D+1': '0011111', 'A+1': '0110111', 'D-1': '0001110',
'D+A': '0000010', 'D-A': '0010011', 'A-D': '0000111', 'D&A': '0000000',
'D|A': '0010101', 'M': '1110000', '!M': '1110001', '-M': '1110011',
'M+1': '1110111', 'M-1': '1110010', 'D+M': '1000010', 'D-M': '1010011',
'M-D': '1000111', 'D&M': '1000000', 'D|M': '1010101', 'A-1': '0110010'
}
# init destTable
destination_table = {'null': '000', 'M': '001', 'D': '010', 'MD': '011',
'A': '100', 'AM': '101', 'AD': '110', 'AMD': '111'}
# init jumpTable
jumptTable = {'null': '000', 'JGT': '001', 'JEQ': '010', 'JGE': '011',
'JLT': '100', 'JNE': '101', 'JLE': '110', 'JMP': '111'}
def tobin(val, nbits):
return bin((val + (1 << nbits)) % (1 << nbits))[2:]
def build_symbol_table(filename):
# init symbol_table
symbol_table = {
'SP': 0, 'LCL': 1, 'ARG': 2, 'THIS': 3, 'THAT': 4, 'SCREEN': 16384,
'KBD': 24576, 'R0': 0, 'R1': 1, 'R2': 2, 'R3': 3, 'R4': 4, 'R5': 5,
'R6': 6, 'R7': 7, 'R8': 8, 'R9': 9, 'R10': 10, 'R11': 11, 'R12': 12,
'R13': 13, 'R14': 14, 'R15': 15
}
instr_count = 0
with open(filename) as in_textfile:
for line in in_textfile:
line = line.strip() # strip space
line = line.split('//')[0].strip() # strip comment
if not line:
continue
if line.startswith('('):
symbol = line[1:].split(')')[0]
symbol_table[symbol] = instr_count
# ignoring double symbol error
continue
# ignoring invalid A_COMMAND & C_COMMAND
instr_count += 1
return symbol_table
def main():
# command-line argument
parser = argparse.ArgumentParser(description='HACK Assembler.')
parser.add_argument('filename')
args = parser.parse_args()
# first pass
symbol_table = build_symbol_table(args.filename)
# second pass
instr_count = 0
symbol_counter = 16 # after R0-R15
outname = args.filename.split('.')[0]+".hack"
with open(args.filename) as in_textfile, open(outname, 'w') as outfile:
for line in in_textfile:
line = line.strip() # strip space
line = line.split('//')[0].strip() # strip comment
if not line:
continue
if line.startswith('('):
continue
# --------------A-COMMAND------------------
if line.startswith('@'):
token = line[1:]
if not token.lstrip('-').isdigit():
if token not in symbol_table:
symbol_table[token] = symbol_counter
symbol_counter += 1
token = symbol_table[token]
# print("0{0:015b}".format(token))
outfile.write("0{0:015b}\n".format(token))
else:
# dealing with negative number
if token[0] == '-':
# print('0'+tobin(int(token),15))
outfile.write('0'+tobin(int(token), 15)+'\n')
else:
outfile.write("0{0:015b}\n".format(int(token)))
instr_count += 1
continue
# --------------C-COMMAND------------------
comp = dest = jump = ""
if '=' in line:
token = line.split('=')
dest = destination_table[token[0]]
line = token[1]
else:
dest = destination_table['null']
token = line.split(';')
comp = compTable[token[0]]
if ';' in line:
jump = jumptTable[token[1]]
else:
jump = jumptTable['null']
# print('111'+comp+dest+jump)
outfile.write('111'+comp+dest+jump+'\n')
instr_count += 1
if __name__ == "__main__":
main()