Starting from this answer I thought I would show a fully worked example of how to create a data-driven version of the assembler for the Hack assembly language. As I noted in that answer, having things data driven also provides the opportunity to create a disassembler using the same data structure. This code implements both. I had intended to create some test code in C++ to exercise parts of this, but ran out of time to do so, so there may be some remaining bugs. This is the reason that some of the functions are declared as global and begin with an underscore. Here's a sample input file:
source.hack
@16
M=1
@17
M=0
@16
D=M
@0
D=D-M
@18
D;JGT
@16
D=M
@17
M=D+M
@16
M=M+1
@4
0;JMP
@17
D=M
Example of use:
hack input.hack > input.bin hack input.bin d > reconstructed.hack
Note that to specify either operation (assemble/disassemble) we need to supply a file name. To specify disassembly, an additional command line argument is given. The contents of it are not used; just the presence of an additional argument is what triggers disassembly.
It also makes no effort to diagnose or even detect malformed or invalid input lines.
hack.asm
BUFFSIZE equ 16384
; some character constants
NEWLINE equ 10
SPACE equ 32
TAB equ 9
; some syscall constants
SYSREAD equ 0
SYSWRITE equ 1
SYSOPEN equ 2
; default file handles
STDIN equ 0
STDOUT equ 1
STDERR equ 2
; exit takes an exit error code
%macro exit 1
mov edi, %1
mov eax, 60
syscall
%endmacro
; openfile takes an ASCIIZ filename
%macro openfile 1
mov rdi, %1
xor esi, esi ; read only
xor edx, edx ; no create
mov eax, SYSOPEN
syscall
%endmacro
; readfile takes a file descriptor, buffer pointer, and buffer size
%macro readfile 3
mov rdi, %1
mov rsi, %2
mov rdx, %3
mov eax, SYSREAD
syscall
%endmacro
; print takes the file handle, a pointer to the string, and a length
%macro print 3
mov rdi, %1 ; print to passed file handle
mov rsi, %2 ; pointer to buffer
mov rdx, %3 ; length of buffer
mov eax, SYSWRITE
syscall
%endmacro
; takes mask, and pointer to table as arguments
; ENTRY:
; rax : current opcode to be decoded
; rdi : pointer to current output
; EXIT:
; rdi : updated current output pointer
; TRASHED:
; rbx
; rcx
; rsi
; r10
%macro decodeOpcodePart 2
mov eax, ebx
and eax, %1
mov r10, %2
call _reverseTableLookup
jc %%nextpart
xor rcx,rcx
mov cx, word [r10 + Operation.strlen]
lea esi, [r10 + Operation.string]
rep movsb
%%nextpart:
%endmacro
; takes mask, and pointer to table as arguments
; ENTRY:
; rax : current partially encoded opcode
; EXIT:
; rax : updated opcode
; TRASHED:
; rdi
%macro encodeOpcodePart 1
mov edi, %1
call _tableLookup
or ax, word[edi + Operation.value]
%endmacro
section .bss
buffer resb BUFFSIZE
outbuf resw BUFFSIZE/2
line resb 10
section .data
err_no_args db "Error: no input filename given on command line", 10
err_no_args_len equ $ - err_no_args
err_open db "Error: could not open file",10
err_open_len equ $ - err_open
err_read db "Error: could not read from file", 10
err_read_len equ $ - err_read
err_zero db "Error: read zero words from file", 10
err_zero_len equ $ - err_zero
err_write db "Error: could not write to output",10
err_write_len equ $ - err_write
section .text
global _start
global _tableLookup
global _stringToNumber
; look up a string token in a table and return the corresponding value
;
; ENTRY:
; esi : points to strz token
; edi : points to Operation table
; EXIT:
; cy : set if not found in table
; edi : points to matching entry if found
; esi : points to one char after matched token if found
; TRASHED:
; rbx
_tableLookup:
mov rbx, rsi ; save original pointer to token
.looptop:
movzx ecx, word [edi + Operation.strlen]
push rdi
repe cmpsb
pop rdi
je .found
add edi, Operation_size
cmp word [edi + Operation.strlen],0
mov rsi,rbx
jne .looptop
stc
.found:
ret
; look up a a value and construct the corresponding string
;
; ENTRY:
; eax : binary value to search for
; r10 : points to Operation table
; EXIT:
; cy : set if not found in table
; r10 : points to matching entry if found
; TRASHED:
_reverseTableLookup:
.looptop:
cmp ax, word [r10 + Operation.value]
je .found
add r10, Operation_size
cmp word [r10 + Operation.strlen],0
jne .looptop
stc
.found:
ret
; given a pointer to a decimal number string, convert to the value in eax
; ENTRY:
; esi : points to strz decimal number string
; EXIT:
; eax : contains converted value
; esi : points to one past ending character
; TRASHED:
; ebx, ecx
_stringToNumber:
push rbx
push rcx
xor eax, eax
xor ebx, ebx
xor ecx, ecx
inc ecx ; default multiplier is +1
lodsb
cmp al, '-' ; is it a leading - sign?
jnz .numeric
dec ecx
dec ecx
.numeric:
sub al, '0'
jb .done
cmp al, 10
jnb .done
imul ebx, 10
add ebx, eax
lodsb
jmp .numeric
.done:
imul ebx, ecx
mov eax, ebx
pop rcx
pop rbx
ret
; given a 16-bit number in ax, convert to string in line
; ENTRY:
; rax : number to convert (high 48 bits must be zero)
; rdi : points to destination buffer
;
; EXIT:
; rdi : points to one past end of written string
; TRASHES:
; rbx
;
_numberToString:
push rsi
push rcx
mov rsi, rdi
mov bx,10
add rdi,5
.loopy:
dec rdi
xor edx,edx
div bx
add dl, '0'
mov byte [rdi],dl
test rax,rax
jnz .loopy
; compute 5 - (rdi - rsi) = rsi - rdi + 5 = rsi + 5 - rdi
lea rcx, [rsi + 5]
sub rcx, rdi
xchg rsi, rdi
rep movsb
pop rcx
pop rsi
ret
; copy just one assembly line from source to dest buffer, omitting whitespace
; ENTRY:
; esi : pointer to source
; edi : pointer to destination
; eax : number of bytes in source
; EXIT:
; esi : points to start of next line in source (if any)
; edi : points one past end of copied line
; eax : bytes remaining in source
; TRASHED:
; none
getline:
push rcx
mov ecx, eax
.top:
lodsb
cmp al, NEWLINE
jz .done
cmp al, SPACE ; skip spaces
jz .skipws
cmp al, TAB ; skip tabs, too
jz .skipws
stosb ; neither of those, so store it
.skipws:
loop .top
jmp .exit
.done:
xor eax,eax ; write terminating NUL char
stosb
dec ecx
.exit:
mov eax, ecx
pop rcx
ret
_start:
mov rax, [rsp] ; load argc into rax
mov edx, err_no_args
mov ebx, err_no_args_len
cmp eax, 2 ; have to have at least one argument
js error_exit
openfile [rsp+16] ; try opening the file named in the argument
mov edx, err_open
mov ebx, err_open_len
cmp eax,0
js error_exit
readfile rax, buffer, BUFFSIZE ; read the entire file into memory
mov edx, err_read
mov ebx, err_read_len
cmp eax,0
js error_exit
je no_bytes
main:
mov rdi, outbuf ; rdi points to current output buffer
cmp qword [rsp], 3 ; if there are two arguments, we are disassembling
jne assemble
mov ecx, eax
shr rcx, 1 ; convert from num bytes to num words
jz no_bytes
disassemble:
; ecx = number of remaining bytes in source
; esi = pointer to source buffer
; rdi = current pointer to output buffer
lodsw ; get the next word to disassemble
push rsi
push rcx ; remember updated count and pointer
test ah, 0x80 ; is this a type A instruction?
jnz .decode_c
mov byte [rdi], '@' ; it's a type A which is "@nnnn"
inc rdi
call _numberToString
jmp .emitLine
; it's a type C instruction of the form "dest=comp;jmp"
.decode_c:
mov ebx, eax ; save opcode in ebx
decodeOpcodePart DESTMASK, hackdest
decodeOpcodePart COMPMASK, hackcomp
decodeOpcodePart JUMPMASK, hackjump
; at this point we expect rcx (input len) and rsi (input ptr) on the stack,
; rdi : points to current end of output buffer
.emitLine:
pop rcx
pop rsi
mov al,NEWLINE
stosb
dec rcx
jnz disassemble
jmp write_output
assemble:
; eax = number of remaining bytes in source
; esi = pointer to source buffer
; rdi = current pointer to output buffer
mov r10, rdi ; temporarily save output pointer
mov edi, line
call getline ; copy a line, omitting whitespace
push rax
push rsi
mov rsi, line
cmp byte [rsi], '@' ; is this a type A instruction?
jnz .c_instruction ; if not, assume it's type C
inc rsi
call _stringToNumber
jmp .storeOpcode
.c_instruction:
; it's a C instruction
; rsi is already pointing to line
xor eax, eax
encodeOpcodePart hackdest
encodeOpcodePart hackcomp
encodeOpcodePart hackjump
.storeOpcode:
mov rdi, r10 ; restore output pointer
stosw
pop rsi
pop rax
test rax,rax
jne assemble
write_output:
; rdi = current pointer to output buffer
mov rdx, rdi
sub rdx, outbuf
print STDOUT, outbuf, rdx
mov edx, err_write
mov ebx, err_write_len
cmp eax,0
js error_exit
exit 0
no_bytes:
mov edx, err_zero ; if the file was empty, it's an error
mov ebx, err_zero_len
; fall through to error_exit
error_exit:
print STDERR, rdx, rbx
exit 1
section .data
struc Operation
.string: resb 8
.strlen: resw 1
.value: resw 1
endstruc
; opcode "string", value
%macro opcode 2
istruc Operation
at Operation.string, db %1
%strlen len %1
%rep 8 - len
db 0
%endrep
at Operation.strlen, dw len
at Operation.value, dw %2
iend
%endmacro
DESTMASK equ 0x38
COMPMASK equ 0xffc0
JUMPMASK equ 0x07
hackdest:
opcode "AMD=", 0x38
opcode "AD=", 0x30
opcode "AM=", 0x28
opcode "MD=", 0x18
opcode "M=", 0x08
opcode "D=", 0x10
opcode "A=", 0x20
opcode "", 0
hackcomp:
opcode "D+1", 0xe7c0
opcode "A+1", 0xedc0
opcode "M+1", 0xfdc0
opcode "D-1", 0xe380
opcode "A-1", 0xec80
opcode "M-1", 0xfc80
opcode "D+A", 0xe080
opcode "D+M", 0xf080
opcode "D-A", 0xe4c0
opcode "D-M", 0xf4c0
opcode "A-D", 0xe1c0
opcode "M-D", 0xf1c0
opcode "D&A", 0xe000
opcode "D&M", 0xf000
opcode "D|A", 0xe540
opcode "D|M", 0xf540
opcode "-1", 0xee80
opcode "!D", 0xe340
opcode "!A", 0xec40
opcode "!M", 0xfc40
opcode "-D", 0xe3c0
opcode "-A", 0xecc0
opcode "-M", 0xfcc0
opcode "0", 0xea80
opcode "1", 0xefc0
opcode "D", 0xe300
opcode "A", 0xec00
opcode "M", 0xfc00
opcode "", 0
hackjump:
opcode ";JGT", 0x1
opcode ";JEQ", 0x2
opcode ";JGE", 0x3
opcode ";JLT", 0x4
opcode ";JNE", 0x5
opcode ";JLE", 0x6
opcode ";JMP", 0x7
opcode "", 0
```
1 Answer 1
Commenting is good
There are a number of inaccuracies though in the documentation, several of these are probably related to copy-pasting:
- In the decodeOpcodePart macro the current opcode to be decoded is in RBX (so not in RAX), and it is RAX that gets trashed (so not RBX).
- The encodeOpcodePart macro mentions that it 'takes mask, and pointer to table as arguments' where in fact there's but a single pointer argument.
- The _tableLookup subroutine forgets to mention that it additionally trashes RCX.
- The _stringToNumber subroutine mentions that RBX and RCX get trashed where in fact both these registers are preserved on the stack.
- The _numberToString subroutine mentions that on exit RDI 'points to one past end of written string' where in fact RDI points just past the end of the written string (so not 'one past' like it is the case in the _stringToNumber subroutine).
- The _numberToString subroutine forgets to mention that it additionally trashes RAX and RDX.
- At the disassemble label ECX holds the number of remaining words in source (so not bytes).
For ease of reviewing, the TRASHED displays of the decode and encode macros should include the registers that get trashed in the subroutines that they call. Same goes for the ENTRY and EXIT displays.
Avoiding the use of "magic numbers"
In your answer to the 'rags' question you advocate to avoid the use of "magic numbers", but in adding the missing definition for exit
, you introduced your own magic number mov eax, 60
. Perhaps use SYSTERM equ 60
.
Fix the bug
In the _stringToNumber subroutine you negate your final multiplier (+1 -> -1) if a minus character was found in the text. However you forget to load the character that follows the minus. That has to be the first digit that the .numeric loop processes.
Missed optimizations
- When, in the _stringToNumber subroutine you negate your final multiplier (+1 -> -1), you use
dec ecx
dec ecx
which takes 4 bytes, whereas a singleneg ecx
would do it in 2 bytes. - When, in the _stringToNumber subroutine you validate and convert a digit, you can safely remove the
jb .done
instruction. The byte wraparound will take care of it in thecmp al, 10
that immediately follows. - All occurencies of
cmp eax, 0
can be written astest eax, eax
which has a 1 byte shorter encoding, at least if you assemble with-Ox
. Without optimizations it would be a lot worse. - Because the high dword of RCX is zeroed, we can replace the 3-byte
shr rcx, 1
instruction by the 2-byteshr ecx, 1
instruction. - It is 1 byte shorter to write
test ah, ah
js .decode_c
instead oftest ah, 0x80
jnz .decode_c
. - It is 4 bytes shorter to temporarily save the output pointer via
push rdi
...pop rdi
than to usemov r10, rdi
...mov rdi, r10
. - If
mov edi, line
works fine, thenmov rsi, line
can safely becomemov esi, line
. This shaves off the REX prefix and opens up a world of similar REX-related optimizations that sometimes you do and sometimes you don't. I can't quite decide whether you care about codesize, execution speed, both, or none.
Use efficient instructions
Intel advices against using complex instructions like loop
. In _getline you could replace loop .top
with dec ecx
jnz .top
.
Think carefully about jumps
In your answer to the 'rags' question you warn about jumps being a relatively costly operation. Right so, but in the _stringToNumber subroutine you commit the same crime.
Next is my optimized rewrite of this conversion:
; given a pointer to a decimal number string, convert to the value in rax
; ENTRY:
; rsi : points to strz decimal number string
; EXIT:
; rax : contains converted value
; rsi : points to past ending character
; TRASHED:
; rdx
_stringToNumber:
xor eax, eax
jmp .first ; A one-time jump
.digit:
inc esi
lea eax, [rax + rax * 4] ; (Number * 5) * 2 + Newdigit
lea eax, [rdx + rax * 2]
.first:
movzx edx, byte [rsi]
sub edx, '0'
cmp dl, 10
jb .digit ; The only jump in this tight loop
ret
I verified that it is ok for the caller of this subroutine to have RDX trashed.
I removed support for the minus character based on the following quotes from the documentation:
... decimal (0-9) digits may be used to represent a non-negative, decimal constant in the range 0 through 32,767. The use of the minus sign to indicate a negative number is not allowed.
The A-instruction ... is the only means to introduce a (non-negative) numeric value into the computer under program control;
Avoid complicating matters
The setup that you do for rep movsb
in _numberToString seems convoluted.
I could simplify the code mostly by anticipating (before the loop) the upcoming rep movsb
that will require RCX, RSI, and RDI been setup. No need for an exchange between RSI and RDI, nor another addition of 5 to calculate RCX.
; given a 16-bit number in ax, convert to string in line
; ENTRY:
; eax : number to convert (high word is empty)
; rdi : points to destination buffer
; EXIT:
; rdi : points to past end of written string
; TRASHES:
; rax, rbx, rcx, rdx, rsi
_numberToString:
lea esi, [rdi + 5] ; Past end of the 5-byte buffer
mov ecx, esi
mov ebx, 10
.loopy:
dec esi
xor edx, edx
div ebx
add edx, '0'
mov [rsi], dl
test eax, eax
jnz .loopy
sub ecx, esi
rep movsb
ret
Your use of test rax, rax
made you insist on 'high 48 bits must be zero'. Writing test eax, eax
not only shortens the code but also relaxes this condition to bits 16-31.
I verified that it is ok for the caller of this subroutine to have RAX, RBX, RCX, RDX, and RSI trashed.
Use a data-driven structure
I very much like what you did with this: the Operation struc and the opcode macro.
But seeing that no mnemonic string has more than 4 characters, why did you choose for a string of 8 instead of 4? Couldn't the program benefit from the increased data density? I rewrote the entire program applying everything from this review (and more that I didn't feel like posting about), and I got a 25% reduction of the .data section and a 19% reduction of the .code section.
Who is right and who is wrong?
The nand2tetris document that you link to uses AMD=
and MD=
, but the wikipedia page that I consulted uses ADM=
and DM=
. Whom shall I trust?
-
\$\begingroup\$ I appreciate the review! As for the last point: the advantage to the current scheme is that it would be simple to add the other encodings to the list and it would accept both but only emit the first matching entry. My advice is to trust no one (especially those who write false comments!) :) \$\endgroup\$Edward– Edward2022年07月06日 19:58:51 +00:00Commented Jul 6, 2022 at 19:58
_
; ELF systems don't do that. e.g. C main's asm symbol name ismain
. (_start
is part of the implementation and thus uses a reserved name starting with a leading underscore to avoid polluting the global namespace.) \$\endgroup\$[edi + Operation.strlen]
instead of just requiring your caller to pass a 64-bit pointer as a starting point. Dynamic or stack memory will be outside the low 32 bits of virtual address space, unless you built this for the x32 ABI, ILP32 in long mode. You use 64-bitr10
in another function, rather than 32-bitr10d
, so that instruction only needs a REX prefix, not also a67
operand-size override to truncate the address. \$\endgroup\$