We were asked to create a Base64 encoder for Assembly x86-64 on Linux. Was wondering how my code below could be improved, be it notation or anything else. We only had Assembly for 3 months so I'm not really that used to any kind of particular notation. That's a reason why I'm asking. Thanks for your help !
SECTION .data
Base64Table: db "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
SECTION .bss
byteStorage: resb 30000
bytesToReadAtOnce: equ 30000
b64EncStor: resb 40000
b64EncStorLen: equ $ - b64EncStor
SECTION .text
global _start
_start:
;sys read put everything from the file into the buffer "byteStorage"
mov rax, 0
mov rdi, 0
mov rsi, byteStorage
mov rdx, bytesToReadAtOnce
syscall
xor r11, r11 ; syscall strangely changes r11
xor r12, r12 ;r12 will keep track of index in byteStorage array
mov r13, 0 ;r13 will keep track of index in b64EncStor array
.encodingInProgress:
cmp rax, 0
je .weHaveFinished ;if no bits remaining, and no extra one or two
;bytes, we simply jump to the end
dec rax
inc r12
mov r8b, [byteStorage + r12 -1] ; put each input char in a register each
mov r11b, r8b
shr r11b, 2
and r11b, 0x3F
mov r11b, [Base64Table + r11]
mov [b64EncStor + r13], r11b ; our first char is now encoded
inc r13
cmp rax, 0 ;if rax = 0, rax was one before above decrementation, so we jump
je .oneExtraByte ;to .oneExtraByte
;char two
dec rax
inc r12
mov r9b, [byteStorage + r12-1] ; put each input char in a register each
and r8b, 0x3
shl r8b,4
mov r11b, r9b
shr r11b, 4
and r11b, 0xF
add r8b, r11b
mov r8b, [Base64Table + r8]
mov [b64EncStor+r13], r8b ; second char now encoded
inc r13
cmp rax, 0 ;rax was two before being decremented twice above, so we
je .twoExtraBytes ;jump to .twoExtraBytes
;char three
dec rax
inc r12
mov r10b, [byteStorage + r12-1] ;put each input char in a register each
and r9b, 0xF
shl r9b, 2
mov r8b, r10b
shr r8b, 6
and r8b, 0x3
add r9b, r8b
mov r9b, [Base64Table + r9]
mov [b64EncStor+r13], r9b ; third char now encoded
inc r13
;char four
and r10b, 0x3F
mov r10b, [Base64Table + r10]
mov [b64EncStor+r13], r10b ; fourth char now encoded
inc r13
jmp .encodingInProgress
;--------
.oneExtraByte: ;so we need four (and not two !) bits more to reach 12
shl r8b, 4
and r8b, 0x3F ;only keep six bits from left, the two most right are zero
mov r8b, [Base64Table + r8]
mov [b64EncStor + r13], r8b
inc r13
mov r8b, "=" ;add two extra equal signs
mov [b64EncStor + r13], r8b
inc r13
mov [b64EncStor + r13], r8b
inc r13
jmp .weHaveFinished
;------
.twoExtraBytes: ;so we need two (and not four !) bits more to reach 18
;inc r12
mov r10b, [byteStorage + r12-1] ;put each input char in a register each
shl r10b, 2
and r10b, 0x3F ;only keep six bits from left, the two most right are zero
mov r10b, [Base64Table + r10]
mov [b64EncStor + r13], r10b
inc r13
mov r8b, "=" ;add one extra equal sign
mov [b64EncStor + r13], r8b
inc r13
jmp .weHaveFinished
;--------
.weHaveFinished:
;syscall for write, to output the result
mov rax, 1
mov rdi, 1
mov rsi, b64EncStor
mov rdx, r13
syscall
xor r12,r12
mov rax, 60 ; System call for exit
mov rdi, 0
syscall
-
\$\begingroup\$ Feel free to comment if anything is unclear \$\endgroup\$user197730– user1977302022年03月05日 18:39:23 +00:00Commented Mar 5, 2022 at 18:39
1 Answer 1
Wiping registers
mov rax, 0 mov rdi, 0 xor r11, r11 xor r12, r12 mov r13, 0
The preferred way to clear a register is to use the xor reg, reg
instruction. It's small and fast. From the above it would seem that you knew this already but didn't apply it consistently. But there's more to it than just using xor
. It is best to only have xor
operate on the low 32 bits because the CPU will zero the high 32 bits automatically. For the 'old' registers (RAX
, RBX
, ... ) this will shave off a REX prefix:
xor eax, eax
xor edi, edi
xor r11d, r11d
xor r12d, r12d
xor r13d, r13d
A lurking danger
mov r8b, [Base64Table + r8] mov r9b, [Base64Table + r9] mov r10b, [Base64Table + r10]
Your program only ever writes to the lowest byte of the r8
, r9
, and r10
registers. There's no guarantee whatsoever that the whole 64-bit register will be suitable for indexing like you plan. Best add the following to your wipe list:
xor r8d, r8d
xor r9d, r9d
xor r10d, r10d
Redundant operations
shr r11b, 2 and r11b, 0x3F
The shr
instruction already cleared the 2 topmost bits. There's no need for the and
instruction that would do the same thing.
and r8b, 0x3 shl r8b,4 mov r11b, r9b shr r11b, 4 and r11b, 0xF add r8b, r11b
Here the same redundancy with shifting and anding r11b
. In this case however you can consolidate both these and
's:
shl r8b, 4
mov r11b, r9b
shr r11b, 4
add r8b, r11b
and r8b, 0x3F
and r9b, 0xF shl r9b, 2 mov r8b, r10b shr r8b, 6 and r8b, 0x3 add r9b, r8b
And again the same redundancy with shifting and anding r8b
. Consolidating both and
's gives:
shl r9b, 2
mov r8b, r10b
shr r8b, 6
add r9b, r8b
and r9b, 0x3F
jmp .weHaveFinished ;-------- .weHaveFinished:
The code can just as well fall through at this point. The jmp
is redundant.
Optimizations
dec rax ... cmp rax, 0 ;if rax = 0, rax was one before above decrementation je .oneExtraByte ; we jump to .oneExtraByte
You can safely delay executing the dec rax
instruction. The code at the ellipsis doesn't depend on the value that's in RAX
. Instead of inspecting using cmp
, inspect the flags from using dec
. Apply this trick 3 times:
test rax, rax ; `TEST RAX, RAX` is preferred over `CMP RAX, 0`
jz .weHaveFinished
.encodingInProgress:
...
dec rax
jz .oneExtraByte
...
dec rax
jz .twoExtraBytes
...
dec rax
jnz .encodingInProgress
jmp .weHaveFinished
;----------------------------------
.oneExtraByte:
mov [b64EncStor + r13], r8b inc r13 mov r8b, "=" ;add two extra equal signs mov [b64EncStor + r13], r8b inc r13 mov [b64EncStor + r13], r8b inc r13 jmp .weHaveFinished
All of this incrementing on r13
is overkill. The code is about to end anyway. Just write it like:
mov [b64EncStor + r13], r8b
mov word [b64EncStor + r13 + 1], "=="
jmp .weHaveFinished
Style
To improve readability, you could start all of your tail-comments at the same column. (same goes for instruction mnemonics and operands)
Also be consistent with how you use whitespace. eg. I see the following:
mov [b64EncStor + r13], r11b ; our first char is now encoded mov [b64EncStor+r13], r8b ; second char now encoded mov [b64EncStor + r13], r8b
Wouldn't you agree that
mov [b64EncStor + r13], r11b ; our first char is now encoded
mov [b64EncStor + r13], r8b ; second char now encoded
mov [b64EncStor + r13], r8b
is nicer to look at?