This is the x86 version of a C file assignment (I have included both). We were to convert the C code to assembly. I am rather new at assembly and would really appreciate suggestions and help for optimization and functionality.
This is a C implementation of Dijkstra's recursive algorithm for computing the greatest common divisor of two integers.
#include <unistd.h>
#include <stdlib.h>
#define STDIN 0
#define STDOUT 1
unsigned int getInt(char* string) {
unsigned int result = 0;
char* digit = string;
while (*digit != '\n') digit++; // Obtain the address of
digit--; // the last digit character
while (digit >= string) {
if (*digit == ' ') break;
if (*digit < '0' || *digit > '9') {
char* errorMessage = "Bad Number.\n";
write(STDOUT,errorMessage,12);
exit(0);
}
// use the MUL (dword) instruction here (unsigned multiply)
// be careful to understand its operands and results
result += (*digit - '0') * digitValue;
digitValue *= 10;
digit--; // walk backwards from least
} // significant to most
return result;
}
void makeDecimal(unsigned int n) {
// use the DIV (dword) instruction here (unsigned divide)
// be careful to understand its operands and results
unsigned int remainder = n % 10;
unsigned int quotient = n / 10;
if (quotient > 0) makeDecimal(quotient); // notice recursion!
char digit = remainder + '0';
write(STDOUT,&digit,1);
}
int readNumber() {
char data[20];
char* prompt = "Enter a positive integer: ";
write(STDOUT,prompt,26);
read(STDIN,data,20);
return getInt(data);
}
unsigned int gcd(unsigned int n, unsigned int m) {
if (n > m) {
return gcd(n - m, m); // recursion
} else if (n < m) {
return gcd(n, m - n); // recursion
} else return n; // base case
}
int main() {
char newLineChar = '\n';
unsigned int a, b, answer;
a = readNumber();
b = readNumber();
answer = gcd(a,b);
char* message = "Greatest common divisor = ";
write(STDOUT,message,26);
makeDecimal(answer);
write(STDOUT,&newLineChar,1);
exit(0);
}
This is the assembly version of the C program. The task is to implement this same program entirely in 32-bit x86 assembly language (for assembly by NASM and execution under Linux).
SECTION .data
posInt: db 'First num',10
posIntL equ $-posInt
badNum: db 'Bad Number.',10
badNumL equ $-badNumL
gcdiv: db 'GCD = ',10 ; greatest common divisor
gcdL equ $-gcdL
answ: db " ",10
answL equ $-answ
testP: db 'It got here: ',10
testLen equ $-testP
lVal: equ 48
hVal: equ 57
SECTION .bss
result: resb 8
chars: equ 20
inbuf1: resb chars + 1 ;space for 22 bytes
SECTION .text
global gcd
global makeDec
global getInt
global numRead
global main
main:
call numRead
mov ebx, eax
call numRead
mov ecx, eax
push ebx
push ecx
call gcd
mov esi, eax
push esi
call makeDec
mov eax, 1
mov ebx, 0
int 80H
numRead:
push ebp
mov ebp, esp
push ebx
push ecx
push edx
nop
mov eax, 4
mov ebx, 1
mov ecx, posInt
mov edx, posIntL
int 80H
mov eax, 3
mov ebx, 0
mov ecx, inbuf1
mov edx, chars
int 80H
push inbuf1
call getInt
add esp, 4
pop edx
pop ecx
pop ebx
mov esp, ebp
pop ebp
ret
getInt:
push ebp
mov ebp, esp
push ebx
push ecx
push edx
push edi
mov edi, [ebp+8]
mov ecx, edi
mov ebx, 1
mov eax, 0
findLast:
mov dl, [ecx]
cmp dl, 10
jne increment
dec ecx
jmp validOrBad
increment:
inc ecx
jmp findLast
validOrBad:
cmp ecx, edi
jb getIntReturn
mov dl, [ecx]
cmp dl, 32
je getIntReturn
cmp dl, lowVal
jl badNumber
cmp dl, highVal
jg badNumber
validNum:
sub dl, 48
imul dx, bx
add al, dl
imul bx, 10
dec ecx
jmp validOrBad
badNumber:
mov eax, 4
mov ebx, 1
mov ecx, badNum
mov edx, badNumL
int 80H
mov eax, 1
mov ebx, 0
int 80H
getIntReturn:
pop edi
pop edx
pop ecx
pop ebx
mov esp, ebp
pop ebp
ret
makeDecimal:
nop
push ebp
mov ebp, esp
push ebx
push ecx
push edx
mov ebx, 10
xor edx, edx
div ebx
cmp eax, 0
jle notEqual
makeDecimalRECURSION:
push eax
call makeDecimal
add esp, 8
notEqual:
add edx, lowVal
mov [result], dl
mov eax, 4
mov ebx, 1
mov ecx, gcdiv
mov edx, gcdL
int 80H
mov eax, 4
mov ebx, 1
mov ecx, result
mov edx, 1
int 80H
mov eax, 4
mov ebx, 1
mov ecx, answerSpace
mov edx, spaceLen
int 80H
pop edx
pop ecx
pop ebx
mov esp, ebp
pop ebp
ret
gcd:
nop
push ebp
mov ebp, esp
push ebx
push ecx
mov ebx, [ebp+8] ;var n
mov ecx, [ebp+12] ;var m
cmp ebx, ecx
jl LESS
jg GREATER
je FINISH
LESS:
sub ecx, ebx
push ebx
push ecx
call gcd
pop ebx
pop ecx
mov esp, ebp
pop ebp
ret
GREATER:
sub ebx, ecx
push ebx
push ecx
call gcd
pop ecx
pop ebx
mov esp, ebp
pop ebp
ret
FINISH:
mov eax, ebx
pop ecx
pop ebx
mov esp, ebp
pop ebp
ret
2 Answers 2
My thoughts (in no particular order):
- Short on comments. Assembler in particular benefits from comments.
- Uses a very conservative style (params passed on stack, all regs preserved, maintain stack frames). For max performance (often the reason to use asm vs high level languages), there are alternatives (pass params in regs, omit stack frame, assume certain regs clobbered during 'call') that use less memory and give better performance.
- Maybe omit
jl LESS
, since that's the default. - What's with
nop
?
Edit: I'm expanding on #2 since it was so terse.
I'm probably harping on this too much, but I think it's an important point to understand about asm programming.
There are rules when programming a computer. Some are enforced by the CPU (mustn't divide by 0, no NULL references, etc). Some are defined by the assembler (accidentally typing mvo eax, 0
instead of mov eax, 0
).
And some are defined by the high level languages (like C). If one routine written in C is going to call another routine written in C, they must agree on a set of rules. Where is the calling routine going to put the parameters so that the called routine can find them? What registers must the called routine preserve? All? Some? None? Where is the return value from the called routine going to be?
But whatever decisions C made are just that: Decisions made by the people who designed the C language. Fortran might use different rules. Pascal, java, COBOL might all do something different.
But asm, ahh...
If your asm code was going to call a C function (yes, that can be done), then you would have to follow the C rules about where to put the parameters and where the C routine will return its result. But when one asm routine calls another, it can use any of the high level language rules (making it flexible enough to work with any language), or use none of them and make up its own.
Which brings us back to your question about removing the stack as a way to pass parameters. The code below doesn't really follow any industry standard rules. But 'main' and 'gcd' are both written according to the same rules. And since they agree, the code works as intended. And the resulting code is (a bit) more efficient.
(I'm going to base this on JS1's code, but you can see where this happens in yours)
Let's start with the existing code. You read values from the stack:
mov eax, [ebp+8] ;var n
mov edx, [ebp+12] ;var m
But why do you read it from the stack? Well, the reason you read it from the stack is that when you call the function, that's where you put the values:
push eax
push edx
call gcd
But why do you push the values before you call the function? Well, you do it because that's where the function is going to read them. In other words, we do it because we need to, and we need to because we do it. It's just a decision you have made about how you will pass the parameters.
So what's the alternative?
What if you just change the rule so that instead of pushing the parameters before you made the call, you just always made sure that eax and edx contain the values you want before you make the call? Then, instead of loading eax and edx from the stack, they are already there.
It's easy to forget and think of registers like variables that go out of scope when you call a function. But that's not how registers work. There is only 1 edx.
I haven't run this (I'm not on Linux), but how about something like this:
gcd:
cmp eax, edx
jg GREATER
je FINISH
sub edx, eax ; LESS case
jmp RECURSE
GREATER:
sub eax, edx
RECURSE:
call gcd ; args popped off by FINISH
FINISH: ; return value must be in eax
ret
Since we are passing the parameters in registers, the push statements before the call to gcd are no longer needed. And since we aren't following cdecl anymore, I also got rid of the stack frame stuff (the ebp stuff).
So what happens: We assume that eax and edx are set before the routine starts (more on that below). We compare them, subtract as necessary and (possibly) call gcd again. And if the rule is that we must have the values in eax and edx before we call gcd, why, they're already there!
So, we do a lot less pushing/popping, have fewer instructions, use less memory, I'm pretty sure this is even gluten free.
(Q: As a thought exercise: what happens if you change the "call gcd" by RECURSE to "jmp gcd"? See answer below.)
We still need to change the code in main that calls gcd for the first time. It needs to make sure the parameters are where the new rule says they are:
call numRead
mov edx, eax
call numRead
; value is already in eax
call gcd
And here is why having rules is so important.
I'm moving the return value from the first numRead call into edx. Then I call numRead again. If numRead treated edx as volatile (the way gcd does), my first number would get overwritten during the second call to numRead (there is only 1 edx register no matter where you are in the program).
And that's part of why I think comments in asm are so important. Having a comment block at the top of each function that describes the purpose of the function, lists what the inputs are (and where they are), and what gets preserved and what gets clobbered can save you a lot of grief.
That's also the benefit of following common standards. If all your asm follows one industry-standard convention (cdecl, stdcall, fastcall, x64, etc), then you don't even have to read the comments to know what the rules for that function are. It may not always be as efficient, but if your project has a lot of code, it is much easier to maintain.
In this case since I know that numRead preserves edx, I take advantage of that fact to keep things simple in main.
Hope this clears up what I was talking about.
(A: Your teacher probably gives you a failing grade, because the code is no longer recursive. But it runs even faster, and uses even less memory).
-
\$\begingroup\$ Can that be done while maintaining the recursion element? \$\endgroup\$JaeJae55– JaeJae552016年06月29日 17:05:50 +00:00Commented Jun 29, 2016 at 17:05
-
\$\begingroup\$ Yes. Most of this is covered by JS1's answer under 'volatile registers.' \$\endgroup\$David Wohlferd– David Wohlferd2016年06月29日 21:17:54 +00:00Commented Jun 29, 2016 at 21:17
-
\$\begingroup\$ I did actually omit the nop and jl LESS from the code. I tried researching online to see how to do recursion without the stack element but alas to no avail. Could you provide an example and/or explanation on how this would work. \$\endgroup\$JaeJae55– JaeJae552016年06月29日 23:23:39 +00:00Commented Jun 29, 2016 at 23:23
-
\$\begingroup\$ That was a fantastic explanation and clears a lot up! My purpose was not entirely based on a course grade but practicality and functionality while processing as optimal as possible. and your explanation takes care of most of those aspirations if not all. Thanks! \$\endgroup\$JaeJae55– JaeJae552016年06月30日 04:55:51 +00:00Commented Jun 30, 2016 at 4:55
-
1\$\begingroup\$ I'm glad you found it useful. Like JS1, I assumed recursion was an assignment requirement. JS1 is quite correct that recursion is almost always a bad idea. \$\endgroup\$David Wohlferd– David Wohlferd2016年06月30日 05:07:23 +00:00Commented Jun 30, 2016 at 5:07
Missing stack fixup
In main()
, you push arguments to gcd()
and makeDec()
but don't pop them or add back to the stack pointer. If you actually returned from main
, your program would crash.
Use volatile registers, not preserved ones
First, I'm assuming you are following the cdecl calling convention and the Intel ABI. Under the Intel ABI, the registers eax
, ecx
, and edx
are volatile registers, which means that a function can trash their values and not preserve them. Registers such as ebx
and esi
are preserved registers, which means a function must preserve their value.
In gcd()
, you have chosen to use ebx
and ecx
, and have attempted to preserve them (I think).
In both the
LESS
case and theGREATER
case, you restore the stack bymov esp, ebp
, which means that you failed to restore theebx
value that you pushed at the very top of the function.If you used
eax
oredx
instead ofebx
, you could delete the two pushes at the beginning and the two pops in theFINISH
case, because you wouldn't need to preserve those two registers.
Miscellaneous
- In
gcd()
, why is the first instruction a nop? - I think that
main()
passed the arguments in the wrong order, but sincegcd()
is symmetrical, it doesn't matter. - You could merge the three return paths of
gcd()
into one, since they are all almost the same.
Rewrite of gcd
Here's how I would have written gcd()
, given the restrictions:
- It has to be recursive
- No tail call optimizations are allowed
- Cdecl calling conventon must be followed
The code:
gcd:
push ebp
mov ebp, esp
mov eax, [ebp+8] ;var n
mov edx, [ebp+12] ;var m
cmp eax, edx
jg GREATER
je FINISH
sub edx, eax ; LESS case
jmp RECURSE
GREATER:
sub eax, edx
RECURSE:
push eax
push edx
call gcd ; args popped off by FINISH
FINISH: ; return value must be in eax
mov esp, ebp ; restore sp (also pops off any args pushed)
pop ebp
ret
Why use cdecl?
There was some discussion in the comments so I'm adding this section to clarify
I assumed you were using cdecl because your code appeared to follow that convention. The main reason for using cdecl is so that your assembly code can call and be called by C code (or other code that follows cdecl). That way, everybody agrees on where to place function arguments (e.g. on the stack instead of through registers), where the return value should go (e.g. eax), and which registers can be trashed by a function (e.g. eax ecx edx).
However, cdecl isn't the only calling convention for x86. Furthermore, you don't even need any calling convention if you don't plan on linking with external code. If you were trying to make your code as fast and small as possible, you would dispense with calling conventions and just do whatever was optimal.
For example, if you threw out calling convention, then you could change gcd()
to not even use the stack. You would pass arguments through registers instead.
Recursion a bad idea
Furthermore, using recursion for gcd()
is a bad idea. You could easily overflow your stack with gcd(MAX_INT, 1)
. Both the C and assembly versions of gcd()
should be changed to use a loop instead of recursing.
However, I assumed that you were required to translate what you were given so I didn't emphasize this point before.
-
\$\begingroup\$ Thank you!, that was quite an improvement! I didn't know that those registers were volatile (I'm new at assembly). I am a little confused by what you mean as to merging the return paths. \$\endgroup\$JaeJae55– JaeJae552016年06月29日 17:27:02 +00:00Commented Jun 29, 2016 at 17:27
-
\$\begingroup\$ @JaeJae55 What I mean is that the three different paths all ended with identical code "pop pop mov pop ret". So instead of having 3 copies of that, just have the one copy and make the other 2 jump to the one copy. If you look at my rewrite, there's only one exit path at FINISH with "mov pop ret", and the other two paths jump or fall through to it. \$\endgroup\$JS1– JS12016年06月29日 17:33:33 +00:00Commented Jun 29, 2016 at 17:33
-
\$\begingroup\$ Oh! In the getintReturn, readNum and notEqual function. So You are saying to create a new function and place a non conditional jmp for each of those functions in place of the code. which is the: pop edx; pop ecx; pop ebx; mov esp,ebp; pop ebp; and ret. My question is should the new function have the "ret" or would that effect the overall execution or rather "follow through" of the code. \$\endgroup\$JaeJae55– JaeJae552016年06月29日 17:50:40 +00:00Commented Jun 29, 2016 at 17:50
-
1\$\begingroup\$ 'those registers are volatile' if you decide that they are. There are conventions that are followed that make interfacing between (say) C and assembler possible. In order for the two to work together they must agree on how things work. But this is pure asm. JS1 uses cdecl in this example (presumably because you did), but there's no law that requires this. Dropping this requirement makes the code even smaller/faster. While using known conventions can make the code easier to read/maintain, one of the features (and also one of the dangers) of asm is you get to decide which, if any, to follow. \$\endgroup\$David Wohlferd– David Wohlferd2016年06月29日 21:33:51 +00:00Commented Jun 29, 2016 at 21:33
-
\$\begingroup\$ @DavidWohlferd I'm not sure I follow, "Volatile if you decide they are" . \$\endgroup\$JaeJae55– JaeJae552016年06月29日 23:21:44 +00:00Commented Jun 29, 2016 at 23:21
result
digit-string? And has 3 differentwrite
system calls, including one from ananswerSpace
buffer which isn't defined anywhere. \$\endgroup\$