The task is to do a byte-by-byte comparison and print whether the files are equal or not.
Code:
dosseg
newline macro
mov ah, 2
mov dl, 13
int 21h
mov dl, 10
int 21h
endm
err macro s
mov dx, offset s
mov ah, 9
int 21h
; Print the error code in AX as a character (decimal).
mov dl, al ; Get the low byte of AX.
add dl, '0' ; Convert to ASCII
mov ah, 2 ; Print character
int 21h
endm
prompt macro s
mov dx, offset s
mov ah, 9
int 21h
endm
info macro s
mov dx, offset s
mov ah, 9
int 21h
endm
getchar macro c
mov ah, 1 ; Read input
int 21h
endm
.model small
.stack 100h
.data
f1 db 128 dup(0) ; Null-terminated filename 1.
f2 db 128 dup(0) ; Filename 2.
h1 dw ? ; File handle returned from DOS.
h2 dw ? ; File handle 2.
prompt1 db 'Enter the first filename: $'
prompt2 db 'Enter the second filename: $'
oerr_msg db 'Error: Failed to open file. Error code: $'
rerr_msg db 'Error: Failed to read file. Error code: $'
eq_msg db 'The files are equal.$'
neq_msg db 'The files are unequal.$'
buf1 db 1 ; Buffer 1 to hold one byte of data.
buf2 db 1 ; Buffer 2 to hold one byte of data.
eof1 db 0 ; To denote if file 1 has reached EOF.
eof2 db 0 ; To denote if file 2 has reached EOF.
.code
main proc
mov ax, @data
mov ds, ax
prompt prompt1
mov si, offset f1
; --- Input the filenames. ---
read:
getchar
cmp al, 13 ; EOL?
je next
mov [si], al ; Store the byte.
inc si
jmp read
next:
prompt prompt2
mov si, offset f2
read2:
getchar
cmp al, 13
je open
mov [si], al
inc si
jmp read2
open:
; --- Open the files using INT 21h ----
mov ah, 3dh ; Open file.
mov al, 0 ; Read-only mode.
mov dx, offset f1
int 21h
jc open_error
mov h1, ax ; File handle returned in AX.
mov ah, 3dh
mov al, 0
mov dx, offset f2
int 21h
jc open_error
mov h2, ax
jmp read_files
open_error:
err oerr_msg
jmp done
; --- Read both files whilst doing a byte-by-byte comparison ---
read_files:
mov ah, 3fh ; Read file.
mov bx, h1 ; First file's handle.
mov dx, offset buf1 ; Pointer to the buffer where the byte will be stored.
mov cx, 1 ; Read 1 byte at a time.
int 21h
jc read_error
cmp ax, cx ; EOF reached?
jne set_eof1
jmp read_file2
set_eof1:
mov eof1, 1 ; Set eof1 to true.
read_file2:
; --- Read from the second file ---
mov ah, 3fh ; Read file.
mov bx, h2 ; First file's handle.
mov dx, offset buf2 ; Pointer to the buffer where the byte will be stored.
mov cx, 1 ; Read 1 byte at a time.
int 21h
jc read_error
cmp ax, cx ; EOF reached?
jne set_eof2
jmp compare_files
set_eof2:
mov eof2, 1
compare_files:
; If eof1 is set:
; If eof2 is set:
; Cleanup and exit. The files were equal.
; Else:
; Cleanup and exit. The files were not equal. We reached EOF for one file.
; Else if eof2 is set:
; Cleanup and exit. The files were not equal. We reached EOF for one file.
; Continue comparing characters.
; Check if EOF is set for file 1.
mov dl, eof1
cmp dl, 1
je eof1_set
; Check if EOF if set for file 2
mov dl, eof2
cmp dl, 1
; The second file has ended, so files are not equal.
je files_not_equal
; Continue comparing characters.
mov dl, buf1
cmp dl, buf2
je read_files
; Bytes are not equal. Bail out.
jmp files_not_equal
eof1_set:
; If EOF for file 1 is set, check if EOF for file 2 is also set.
mov dl, eof2
cmp dl, 1
je files_equal
; Else, one file has ended, so files are not equal.
jmp files_not_equal
files_equal:
info eq_msg
jmp cleanup
files_not_equal:
info neq_msg
jmp cleanup
read_error:
err rerr_msg
jmp done
cleanup:
; --- Close files ----
mov ah, 3eh
mov bx, h1
int 21h
mov ah, 3eh
mov bx, h2
int 21h
done:
newline
mov ah, 4ch
int 21h
main endp
end main
Review Request:
Redundancies, duplication, simplifications. I haven't used assembly before, so anything and everything is welcome.
2 Answers 2
i - Still reading a byte at a time
err macro s mov dx, offset s mov ah, 9 int 21h ; Print the error code in AX as a character (decimal). mov dl, al ; Get the low byte of AX. add dl, '0' ; Convert to ASCII mov ah, 2 ; Print character int 21h endm
Sadly, there's one error in the err macro! Despite the fact that the documentation might say otherwise, the DOS.PrintString function 09h does clobber the AL register. It returns with the character "$" in AL. This means your displaying of the error code currently can't possibly work. Another problem here is that simply adding '0' is enough for error codes from 1 to 9, but not for the many remaining error codes. Luckily DOS.OpenFile and DOS.ReadFile don't need to return error codes above 6.
The prompt and info macros are identical. Why not have just one and name it string?
Whenever similar tasks need to run, using a subroutine is often better. Retrieving the filenames AND opening the files is a good example. My OpenEx subroutine combines both these operations and your main code invokes the subroutine twice, each time with a different set of arguments (here in SI and DI):
...
string prompt1
mov si, offset f1
mov di, offset h1
call OpenEx
string prompt2
mov si, offset f2
mov di, offset h2
call OpenEx
...
done:
newline
mov ax, 4C00h
int 21h
; -------------------------
OpenEx:
; --- Input the filenames. ---
push si ; (1)
getname:
getchar ; -> AL
cmp al, 13 ; EOL?
je end_of_name
mov [si], al ; Store the byte.
inc si
jmp getname
end_of_name:
pop si ; (1)
; --- Open the files using INT 21h ----
mov dx, si
mov ax, 3D00h ; Open file in Read-only mode.
int 21h
jc open_error
mov [di], ax ; File handle returned in AX.
ret
open_error:
err oerr_msg
jmp done
; -------------------------
main endp
end main
You can simplify the task of checking for end-of-files by storing different values in eof1 and eof2. I suggest setting eof1 = 1
and eof2 = 2
. A simple OR then can tell you if it is safe to compare bytes or that halting is in order:
mov al, eof1 ; {0,1}
or al, eof2 ; {0,2}
jnz detected_eof ; AL is {1,2,3}
mov al, buf1 ; Continue comparing characters.
cmp al, buf2
je read_files
files_not_equal: ; Bytes are not equal. Bail out.
string neq_msg
jmp cleanup
detected_eof:
jnp files_not_equal ; AL is {1,2} Only ONE of the files ended
files_equal: ; AL is {3} BOTH files ended together
string eq_msg
jmp cleanup
The jnp
conditional jump, checks the parity flag. If the number of ON bits in a byte (here it was AL from using or al, eof2
) is odd then the parity is said to be "off" aka "no parity". If the number of ON bits in a byte is even then the parity is said to be "on" aka "parity".
0 00000000b parity on
1 00000001b parity off
2 00000010b parity off
3 00000011b parity on
Also note that I used AL instead of the DL register that you wrote. This uses a one byte shorter encoding in mov al, eof1
and in mov al, buf1
.
ii - It gets simpler if we read more at once
It is certainly a valuable idea to read more than one byte at a time, and do know that DOS already does that too internally! DOS uses several 512-bytes buffers for the purpose of disk I/O.
Next is my implementation of this idea. If you study my code, you'll see a lot more stuff that I didn't include in my answer so far, eg. err is no longer a macro, but a full-fledged subroutine, and the buffers get re-used for different purposes.
dosseg
newline macro
mov ah, 2
mov dl, 13
int 21h
mov dl, 10
int 21h
endm
string macro s
mov dx, offset s
mov ah, 9
int 21h
endm
.model small
.stack 100h
.data
prompt1 db 'Enter the first filename: $'
prompt2 db 'Enter the second filename: $'
oerr_msg db 'Error: Failed to open file. Error code: $'
rerr_msg db 'Error: Failed to read file. Error code: $'
eq_msg db 'The files are equal.$'
neq_msg db 'The files are unequal.$'
buf1 db 512 dup(?) ; Buffer 1 to hold 512 bytes of data.
buf2 db 512 dup(?) ; Buffer 2 to hold 512 bytes of data.
h1 dw ? ; File handle 1.
h2 dw ? ; File handle 2.
.code
main proc
mov ax, @data
mov ds, ax
string prompt1
mov si, offset buf1
call OpenEx ; -> AX (DX SI)
mov h1, ax
string prompt2
mov si, offset buf2
call OpenEx ; -> AX (DX SI)
mov h2, ax
ReadFiles:
mov si, offset buf1
mov dx, si
mov bx, h1
call ReadSector ; -> AX (CX DX)
mov bp, ax ; [0,512]
mov di, offset buf2
mov dx, di
mov bx, h2
call ReadSector ; -> AX (CX DX)
mov cx, ax ; [0,512]
cmp bp, cx
jne files_not_equal ; Files have different lengths
jcxz files_equal ; BOTH files ended together
repe cmpsb ; Compare all the bytes ([1,512]) in the buffers
jne files_not_equal ; Found a difference in current sector
cmp ax, 512
je ReadFiles ; Got full buffers, so there could be more
files_equal:
string eq_msg
jmp cleanup
files_not_equal: ; Bytes are not equal. Bail out.
string neq_msg
cleanup: ; --- Close files ----
mov bx, h1
mov ah, 3Eh
int 21h
mov bx, h2
mov ah, 3Eh
int 21h
done:
newline
mov ax, 4C00h
int 21h
; -------------------------
; IN (si) OUT (ax) MOD (dx,si)
OpenEx: ; --- Input the filenames. ---
mov dx, si
getname:
mov ah, 1 ; Read input
int 21h ; -> AL
cmp al, 13 ; EOL?
je end_of_name
mov [si], al ; Store the byte.
inc si
jmp getname
end_of_name: ; --- Open the files using INT 21h ----
mov ax, 3D00h ; Open file in Read-only mode.
mov [si], al ; Make zero-terminated filespec
int 21h
jc open_error
ret
open_error:
mov dx, offset oerr_msg
; --- --- --- --- -
; IN (ax,dx)
err:
push ax ; Preserve error code
mov ah, 9
int 21h
pop dx ; Restore error code
add dl, '0' ; Convert to ASCII
mov ah, 2 ; Print character
int 21h
jmp done
; -------------------------
; IN (bx,dx) OUT (ax) MOD (cx,dx)
ReadSector:
mov cx, 512
mov ah, 3Fh ; Read file.
int 21h ; -> AX CF
mov dx, offset rerr_msg
jc err
ret
; -------------------------
main endp
end main
Read Sectors, not Bytes
This is extremely slow, and especially slow if your disks are the spinning magnetic disks that were used with MS-DOS. Reading one byte from two files at a time would have required the drive spinning the disk, like a vinyl record, until the drive head is over the correct data, then spinning the same direction to the position of the second file, then spinning back to the position of the first file. Never do this with a floppy! It is much faster to read a large amount of data at once, which is why 16-bit systems had disk defragmenters that tried to make all files readable in a single operation.
Always read an entire sector into a buffer and compare the buffers (which you can do several times faster by using wider instructions). Back in the day, a common sector size was 512 bytes, but in the 32-bit era, filesystems began using 4,096-byte clusters (to simplify transferring them into pages of physical RAM). A device driver on a modern OS might use Direct Memory Access or Programmed I/O, which on some implementations would work best with aligned 64-KiB buffers.
Compare File Sizes
Since you’re using the two default FCBs, INT 21h function 23h will retrieve the file sizes. Two files cannot be identical unless they are the same size, and once you know their size, you might be able to load both files into memory in their entirety (although you still might not want to).
Use String Instructions
Once you have two chunks of the file in buffers that DS:SI
and ES:DI
point to, you can compare a byte at a type with CMPSB
or four bytes at a time with CMPSD
. You might load the buffer size into CX
and write a LOOP
that stops when you either reach the end of the buffer or when words of data compare unequal. Alternatively, you might place the buffer size in CX
, CLD
to set the direction flag, use REPE CMPSD
, and check at the end to see whether the canary values were the first bytes that were different. This is a single-instruction inner loop that does not involve branch prediction.
as
on Linux is free, so you could be assigning students to, for example, write 2-D graphics to the framebuffer and count the FPS. \$\endgroup\$