Re-implementing head in x86-64 assembly targeting linux

Question 1

I'm pretty new to x64 programming and tried to recreate parts of head for study (targeting linux).

Are there any stylistic issues?
The head function uses a buffer in the bss section for IO, but I've also seen people allocate buffers on the stack for similar purposes. Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?

The program is tested by comparing its output on a few files in examples with the output of head.

Any feedback would be greatly appreciated.

Project layout:

.
├── examples
│  ├── empty
│  ├── ex01
│  ├── ex02
│  └── to_be
├── headx64.s
├── Makefile
├── README.md
├── test.sh
└── UNLICENSE

Makefile:

AS := as
LD := ld
BIN := headx64
.DELETE_ON_ERROR:
$(BIN): headx64.o
 $(LD) -o $@ $^
headx64.o: headx64.s
 $(AS) -o $@ $^ -ggdb
.PHONY: test clean
test: $(BIN)
 bash test.sh examples
clean:
 rm -rf $(BIN) *.o

test.sh:

#!/bin/bash
# PURPOSE: A simple test script.
# USAGE: To test headx64 on the contents of a directory, use call test.sh DIRECTORY.
directory=1ドル
echo Testing:
# Test single files
for file in `ls $directory`; do
 echo Testing $directory/$file...
 (head $directory/$file) >> tmp1
 (./headx64 $directory/$file) >> tmp2
 diff tmp1 tmp2 || echo head and headx64 differ on $file
done
# Test all files at once
echo Testing $directory/*...
(head $directory/*) >> tmp1
(./headx64 $directory/*) >> tmp2
diff tmp1 tmp2 || echo head and headx64 differ on all files
rm -f tmp1 tmp2
echo All test ran!

headx64.s:

# PURPOSE: Simple copy of the head command line utility.
# USAGE: headx64 [FILE ...]
# AUTHOR: Me ;)
# LICENSE: This code is part of the public domain.
#---CONSTANTS---#
# Syscalls
.equ SYS_READ, 0
.equ SYS_WRITE, 1
.equ SYS_OPEN, 2
.equ SYS_CLOSE, 3
.equ SYS_EXIT, 60
# File descriptors
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
# Status codes
.equ SUCCESS, 0
.equ FILE_ERROR, 1
.equ IO_ERROR, 2
#---DATA---#
 .section .data
# Strings for header
header_left:
 .asciz "==> "
header_right:
 .asciz " <==\n"
# String representing stdin
file_stdin:
 .asciz "-"
# Error messages
file_error_msg:
 .asciz "couldn't open file!\n"
io_error_msg:
 .asciz "io error!\n"
line_break:
 .asciz "\n"
#---BUFFERS---#
 .section .bss
# Buffer used in i/o.
.equ BUFFER_SIZE, 256
.lcomm buffer, BUFFER_SIZE
 .section .text
 .globl _start
#---MAIN ROUTINE---#
_start:
 # Register usage:
 # (rbx) - argc
 # (r12) - Argument in argv.
 mov (%rsp), %rbx
 mov 1,ドル %r12
 # Only a single file (or stdin)?
 cmp 2,ドル %rbx
 jle .L_single_file
.L_args_loop:
 # Print header
 mov $header_left, %rdi
 mov $STDOUT, %rsi
 call fputs
 cmp 0,ドル %rdx
 jl .L_io_error
 mov 8(%rsp, %r12, 8), %rdi
 mov $STDOUT, %rsi
 call fputs
 cmp 0,ドル %rdx
 jl .L_io_error
 mov $header_right, %rdi
 mov $STDOUT, %rsi
 call fputs
 cmp 0,ドル %rdx
 jl .L_io_error
.L_open_file:
 # Open file
 mov 8(%rsp, %r12, 8), %rdi
 call open_rdonly
 cmp 0,ドル %rax
 jl .L_file_error
 # Print file content
 mov %rax, %rdi
 call head
 cmp 0,ドル %rax
 jl .L_io_error
 # All files read?
 inc %r12
 cmp %rbx, %r12
 jge .L_success
 # Print nl and continue
 mov $line_break, %rdi
 mov $STDOUT, %rsi
 call fputs
 cmp 0,ドル %rdx
 jl .L_io_error
 # Repeat
 jmp .L_args_loop
.L_single_file:
 # Open file
 mov $file_stdin, %rdi # read from stdin if no file was given
 cmp 2,ドル %rbx
 cmoveq 8(%rsp, %r12, 8), %rdi
 call open_rdonly
 cmp 0,ドル %rax
 jl .L_file_error
 # Print file content
 mov %rax, %rdi
 call head
 cmp 0,ドル %rax
 jl .L_io_error
 # Exit successfully
.L_success:
 mov $SUCCESS, %rdi
 mov $SYS_EXIT, %rax
 syscall
 # Print error message and exit
.L_file_error:
 mov $file_error_msg, %rdi
 mov $STDOUT, %rsi
 call fputs
 mov $FILE_ERROR, %rdi
 mov $SYS_EXIT, %rax
 syscall
.L_io_error:
 mov $io_error_msg, %rdi
 mov $STDOUT, %rsi
 call fputs
 mov $IO_ERROR, %rdi
 mov $SYS_EXIT, %rax
 syscall
#---SUBROUTINES---#
# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
# path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
# (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function
open_rdonly:
 # path == "-"?
 cmpb $'-', (%rdi)
 jne .L_not_stdin
 cmpb 0,ドル 1(%rdi)
 jne .L_not_stdin
 mov $STDIN, %rax
 ret
.L_not_stdin:
 # Open path
 .equ O_READONLY, 00
 .equ NO_MODE, 00
 mov $O_READONLY, %rsi
 mov $O_READONLY, %rdx
 mov $SYS_OPEN, %rax
 syscall
 ret
# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
# s (rdi) - Pointer to null-terminated string.
# file (rsi) - File descriptor.
# RETURNS:
# (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function
fputs:
 # Find string length
 xor %rdx, %rdx
.L_strlen:
 inc %rdx
 cmpb 0,ドル -1(%rdi, %rdx)
 jne .L_strlen
 dec %rdx
 # Print string
 xchg %rsi, %rdi
 push %rdx # Save strlen for later
 mov $SYS_WRITE, %rax
 syscall
 # Error?
 pop %rdx
 cmp %rdx, %rax
 jl .L_puts_error
 mov 0,ドル %rax
 ret
 
.L_puts_error:
 mov $-1, %rax
 ret
# Function head
# Prints the first 10 lines in file to stdout.
# PARAMETERS:
# file (rdi) - File descriptor.
# RETURNS:
# (rax) - Returns 0 if no io error occurred and -1 otherwise.
.type head, @function
head:
 # The algorithm works by iterating through the buffer until a nl is found.
 # Register usage:
 # rdx - Input file.
 # r12 - Lines found.
 # r13 - No. of characters in buffer.
 # r14 - Position in buffer.
 # Save non-volatile registers
 push %r12
 push %r13
 push %r14
 # No lines found yet
 xor %r12, %r12
.L_fill_buffer:
 # Reset position
 xor %r14, %r14
 # Read buffer
 push %rdi # Save fd
 mov $buffer, %rsi
 mov $BUFFER_SIZE, %rdx
 mov $SYS_READ, %rax
 syscall
 pop %rdi
 mov %rax, %r13
 # At EOF or error?
 cmp 0,ドル %rax
 je .L_head_done
 jl .L_head_error
.L_next_char:
 inc %r14
 # Print buffer and refresh if all characters were visited
 cmp %r13, %r14
 jg .L_print_and_refresh
 # Exit if we've read 10 lines and continue otherwise
 cmpb $'\n', buffer-1(%r14)
 jne .L_next_char
 inc %r12
 cmp 10,ドル %r12
 je .L_head_done
 jmp .L_next_char
.L_print_and_refresh:
 push %rdi # Save fd
 mov $STDOUT, %rdi
 mov $buffer, %rsi
 mov %r13, %rdx
 mov $SYS_WRITE, %rax
 syscall
 pop %rdi
 # Error?
 cmp %r13, %rax
 jl .L_head_error
 # Refill buffer and continue
 jmp .L_fill_buffer
.L_head_error:
 # Restore non-volatile registers & return -1
 pop %r14
 pop %r13
 pop %r12
 mov $-1, %rax
 ret
.L_head_done:
 # Print remaining characters
 mov $STDOUT, %rdi
 mov $buffer, %rsi
 mov %r14, %rdx
 mov $SYS_WRITE, %rax
 syscall
 # Error?
 cmp %rax, %r14
 jne .L_head_error
 # Restore non-volatile registers & return 0
 pop %r14
 pop %r13
 pop %r12
 mov 0,ドル %rax
 ret

Question 2

Are there any stylistic issues?

You can enhance the readability of your program a lot by aligning the instructions, the operands, and the tailcomments to their own columns:

push %rdx # Preserve strlen
mov $SYS_WRITE, %eax
syscall # -> RAX
pop %rdx # Restore strlen

Seeing those nice #---CONSTANTS---#, #---DATA---#, and #---BUFFERS---# comments, it feels like the next comment is missing:

#---CODE---#
 .section .text
 .globl _start

I've also seen people allocate buffers on the stack for similar purposes. Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?

Personal preference probably, but I'd say that working from a general purpose buffer is easiest. Though it is not too difficult to have a buffer in stack memory, the occasional push (like the one you were doing in .L_print_and_refresh) might make it a bit more error-prone and require more attention to detail.

The fputs function

Although you state that this function returns RAX = {-1,0}, four of its six callers instead are inspecting the RDX register! It is your own function so it is your choice: either inspect RAX or else state that you return RDX.

Because of the way you pass parameters RDI and RSI, you had to include the xchg %rsi, %rdi instruction prior to the SYS_WRITE system call. I suggest you modify all of the call sites so you pass the string pointer in RSI and the file descriptor in RDI which I believe is also a bit more common practice.

# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
# s (rsi) - Pointer to null-terminated string.
# file (rdi) - File descriptor.
# RETURNS:
# (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function
fputs:
 xor %edx, %edx # Find string length
.L_strlen:
 inc %edx
 cmpb 0,ドル -1(%rsi, %rdx)
 jne .L_strlen
 dec %edx
 push %rdx # Print string
 mov $SYS_WRITE, %eax
 syscall # -> RAX
 pop %rdx
 cmp %rdx, %rax
 mov 0,ドル %eax # RAX=0
 je .L_puts_ok
 dec %rax # RAX=-1
.L_puts_ok:
 ret

Use 32-bit registers whenever you can, but keep using 64-bit registers within addressing modes and for pushes and pops. Your string length is never going to exceed 4GB so you don't need the extra space that RDX offers. Remember that writing to the lowest 32 bits of a 64-bit register automatically zeroes the highest 32 bits. The benefit for us is that the assembler doesn't need to insert a REX prefix while encoding the instruction, and smaller code in general runs quicker.

The open_rdonly function

.equ O_READONLY, 00
.equ NO_MODE, 00
mov $O_READONLY, %rsi
mov $O_READONLY, %rdx

It is strange to see you loaded both %RSI and %RDX with the same constant. Also probably better to keep these equates in the dedicated CONSTANTS section.
You don't need two separate byte-sized compares to find out about stdin. Use cmpw $'-0円', (%rdi).

# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
# path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
# (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function
open_rdonly:
 mov $STDIN, %eax
 cmpw $'-0円', (%rdi) # path == "-" ?
 je .L_stdin
 mov $..., %esi
 mov $..., %edx
 mov $SYS_OPEN, %eax
 syscall
.L_stdin:
 ret

The head function

 # Error?
 cmp %r13, %rax
 jl .L_head_error
 # Refill buffer and continue
 jmp .L_fill_buffer
.L_head_error:

Because .L_head_error immediately follows, it is easy to avoid the extra jmp instruction. Just inverse the conditional branch:

 cmp %r13, %rax # Error?
 jnl .L_fill_buffer # Refill buffer and continue
.L_head_error:

Sep Roland Sep RolandSep Roland 4,78317 silver badges28 bronze badges · Accepted Answer · 2024-09-25 18:54:33Z

Are there any stylistic issues?

You can enhance the readability of your program a lot by aligning the instructions, the operands, and the tailcomments to their own columns:

push %rdx # Preserve strlen
mov $SYS_WRITE, %eax
syscall # -> RAX
pop %rdx # Restore strlen

Seeing those nice #---CONSTANTS---#, #---DATA---#, and #---BUFFERS---# comments, it feels like the next comment is missing:

#---CODE---#
 .section .text
 .globl _start

I've also seen people allocate buffers on the stack for similar purposes. Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?

Personal preference probably, but I'd say that working from a general purpose buffer is easiest. Though it is not too difficult to have a buffer in stack memory, the occasional push (like the one you were doing in .L_print_and_refresh) might make it a bit more error-prone and require more attention to detail.

The fputs function

Although you state that this function returns RAX = {-1,0}, four of its six callers instead are inspecting the RDX register! It is your own function so it is your choice: either inspect RAX or else state that you return RDX.

Because of the way you pass parameters RDI and RSI, you had to include the xchg %rsi, %rdi instruction prior to the SYS_WRITE system call. I suggest you modify all of the call sites so you pass the string pointer in RSI and the file descriptor in RDI which I believe is also a bit more common practice.

# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
# s (rsi) - Pointer to null-terminated string.
# file (rdi) - File descriptor.
# RETURNS:
# (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function
fputs:
 xor %edx, %edx # Find string length
.L_strlen:
 inc %edx
 cmpb 0,ドル -1(%rsi, %rdx)
 jne .L_strlen
 dec %edx
 push %rdx # Print string
 mov $SYS_WRITE, %eax
 syscall # -> RAX
 pop %rdx
 cmp %rdx, %rax
 mov 0,ドル %eax # RAX=0
 je .L_puts_ok
 dec %rax # RAX=-1
.L_puts_ok:
 ret

Use 32-bit registers whenever you can, but keep using 64-bit registers within addressing modes and for pushes and pops. Your string length is never going to exceed 4GB so you don't need the extra space that RDX offers. Remember that writing to the lowest 32 bits of a 64-bit register automatically zeroes the highest 32 bits. The benefit for us is that the assembler doesn't need to insert a REX prefix while encoding the instruction, and smaller code in general runs quicker.

The open_rdonly function

.equ O_READONLY, 00
.equ NO_MODE, 00
mov $O_READONLY, %rsi
mov $O_READONLY, %rdx

It is strange to see you loaded both %RSI and %RDX with the same constant. Also probably better to keep these equates in the dedicated CONSTANTS section.
You don't need two separate byte-sized compares to find out about stdin. Use cmpw $'-0円', (%rdi).

# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
# path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
# (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function
open_rdonly:
 mov $STDIN, %eax
 cmpw $'-0円', (%rdi) # path == "-" ?
 je .L_stdin
 mov $..., %esi
 mov $..., %edx
 mov $SYS_OPEN, %eax
 syscall
.L_stdin:
 ret

The head function

 # Error?
 cmp %r13, %rax
 jl .L_head_error
 # Refill buffer and continue
 jmp .L_fill_buffer
.L_head_error:

Because .L_head_error immediately follows, it is easy to avoid the extra jmp instruction. Just inverse the conditional branch:

 cmp %r13, %rax # Error?
 jnl .L_fill_buffer # Refill buffer and continue
.L_head_error:

Stack Exchange Network

Re-implementing head in x86-64 assembly targeting linux

Project layout:

Makefile:

test.sh:

headx64.s:

1 Answer 1

The fputs function

The open_rdonly function

The head function

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Re-implementing head in x86-64 assembly targeting linux

Project layout:

Makefile:

test.sh:

headx64.s:

1 Answer 1

The fputs function

The open_rdonly function

The head function

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions