8
\$\begingroup\$

Here's my assembler for the HACK assembly language, part of the Nand2Tetris course. I'd really appreciate any comments/criticism/help!

If you want to know what input and output files should look like, you can check out the test data in the /tests/ folder on GitHub.

Assembler.c:

/**
 * file: assembler.c
 *
 * usage of assembler for the hack assembly language.
 *
 */
#include "assemble.h"
int main(int argc, char* argv[])
{
 // check input is correct
 if (argc != 3)
 {
 fprintf(stderr, "Usage: assembler source output\n");
 return 1;
 }
 // open source file
 FILE* source = fopen(argv[1], "rb");
 if (source == NULL)
 {
 fprintf(stderr, "Error: cannot open source file %s\n", argv[1]);
 return 1;
 }
 // open output file
 FILE* output = fopen(argv[2], "wb");
 if (output == NULL)
 {
 fprintf(stderr, "Error: cannot open output file %s\n", argv[2]);
 fclose(source);
 return 1;
 }
 if (assemble(source, output) == false)
 {
 fprintf(stderr, "Quitting with error.\n");
 return 1;
 }
 return 0;
}

Assemble.h:

/**
 * file: assemble.h
 *
 * assembler for the hack assembly language.
 *
 */
#include <stdio.h>
#include <stdbool.h>
#define MAX_A 32767
#define COMP_TABLE_SIZE 28
#define JUMP_TABLE_SIZE 7
#define MAX_SYMBOL_SIZE 10
// node for symbol and its translation
typedef struct symNode
{
 char symbol[MAX_SYMBOL_SIZE];
 char translation[17];
 struct symNode* next;
}
symNode;
// node for comp code and its translation
typedef struct compNode 
{
 char entry[4];
 char translation[8];
}
compNode;
// node for jump code and its translation
typedef struct jumpNode 
{
 char entry[4];
 char translation[4];
}
jumpNode;
/**
 * assemble: translates source assembly file into machine code.
 * returns true on success, else false;
 */
bool assemble(FILE* source, FILE* output);
/**
 * addSym: add the symbol-translation pair to the start of the linked list beginning with head.
 * returns true on success, else false.
 */
bool addSym(const char* symbol, const char* translation, int line);
/**
 * buildTables: builds the table for comp/jump codes and their translations.
 * returns true on success, else false.
 */
bool buildTables(void);
/**
 * clearTables: frees the tables of comp/jump codes and their translations.
 */
void clearTables(void);
/** 
 * decodeA: reads in an A instruction from source, and outputs the a-instruction to out, converted to binary.
 * returns source line number, or -1 if error.
*/
int decodeA(FILE* source, FILE* output, int line);
/**
 * writeComp: translates comp and outputs it to output.
 * returns true on success, else false.
 */
bool writeComp(char* comp, FILE* output);
/**
 * writeJump: translates jump and outputs it to output.
 * returns true on success, else false.
 */
bool writeJump(char* jump, FILE* output);
/** 
 * decodeC: reads in a C instruction from source (first char is c), and outputs the C-instruction to out, converted to binary.
 * returns line number, or -1 on error.
*/
int decodeC(char c, FILE* source, FILE* output, int line);
/**
 * loadLabels: populates the symbol dictionary with all of the labels in the file.
 * returns true on success, else false;
 */
bool loadLabels(FILE* source);

Assemble.c:

/**
 * file: assemble.c
 *
 * assembler for the hack assembly language.
 *
 * usage: assembler source output
 */
#include "assemble.h"
#include <stdio.h>
#include <stdbool.h> // bool type
#include <ctype.h> // isspace(), isdigit()
#include <stdlib.h> // atoi()
#include <string.h> // strcpy(), strcmp(), strchr()
// head for symbol dictionary linked list
symNode* symHead;
// table for comp codes and their translations
compNode* compDict[COMP_TABLE_SIZE];
// table for jump codes and their translations
jumpNode* jumpDict[JUMP_TABLE_SIZE];
const char* compCodes[COMP_TABLE_SIZE] = {"0", "1", "-1", "D", "A", "!D", "!A", "-D", "-A",
 "D+1", "A+1", "D-1", "A-1", "D+A", "D-A", "A-D",
 "D&A", "D|A", "M", "!M", "-M", "M+1", "M-1", "D+M",
 "D-M", "M-D", "D&M", "D|M"};
const char* compTranslations[COMP_TABLE_SIZE] = {"0101010", "0111111", "0111010", "0001100",
 "0110000", "0001101", "0110001", "0001111",
 "0110011", "0011111", "0110111", "0001110",
 "0110010", "0000010", "0010011", "0010011",
 "0000000", "0010101", "1110000", "1110001",
 "1110011", "1110111", "1110010", "1000010",
 "1010011", "1000111", "1000000", "1010101"};
const char* jumpCodes[JUMP_TABLE_SIZE] = {"JGT", "JEQ", "JGE", "JLT", "JNE", "JLE", "JMP"};
const char* jumpTranslations[JUMP_TABLE_SIZE] = {"001", "010", "011", "100", "101", "110", "111"};
/**
 * addSym: add the symbol-translation pair to the start of the linked list beginning with head.
 * returns true on success, else false;
 */
bool addSym(const char* symbol, const char* translation, int line)
{
 // construct the new node
 symNode* temp = malloc(sizeof(symNode));
 if (temp == NULL)
 {
 fprintf(stderr, "Error (line %d): cannot malloc new symbol node.\n", line);
 return false;
 }
 strcpy(temp->symbol, symbol);
 strcpy(temp->translation, translation);
 if (symHead != NULL) // list not empty
 {
 temp->next = symHead;
 }
 symHead = temp;
 return true;
}
/**
 * buildTables: builds the table for comp/jump codes and their translations.
 */
bool buildTables(void)
{
 int i;
 // build comp table
 for (i = 0; i < COMP_TABLE_SIZE; i++)
 {
 compNode* temp = malloc(sizeof(compNode));
 if (temp == NULL)
 {
 fprintf(stderr, "Error: cannot create comp table\n");
 return false;
 }
 strcpy(temp->entry, compCodes[i]);
 strcpy(temp->translation, compTranslations[i]);
 compDict[i] = temp;
 }
 // build jump table
 for (i = 0; i < JUMP_TABLE_SIZE; i++)
 {
 jumpNode* temp = malloc(sizeof(jumpNode));
 if (temp == NULL)
 {
 fprintf(stderr, "Error: cannot create jump table\n");
 return false;
 }
 strcpy(temp->entry, jumpCodes[i]);
 strcpy(temp->translation, jumpTranslations[i]);
 jumpDict[i] = temp;
 }
 // load default register symbols into symbol table
 int v;
 int k;
 int j;
 for (i = 0; i < 16; i++)
 {
 char* tempSym = malloc(4);
 if (tempSym == NULL)
 {
 fprintf(stderr, "Error: cannot create register table\n");
 return false;
 }
 char* tempTran = malloc(17);
 if (tempTran == NULL)
 {
 fprintf(stderr, "Error: cannot create register table\n");
 return false;
 }
 tempSym[0] = 'R';
 sprintf(tempSym+1, "%d", i);
 v = i;
 k = 0;
 for (j = 15; j >= 0; j--, k++)
 {
 tempTran[k] = '0' + ((v >> j) & 1); 
 }
 tempTran[k] = '0円';
 if (addSym(tempSym, tempTran, 0) == false)
 {
 fprintf(stderr, "Error: cannot create register table\n");
 return false;
 }
 }
 return true;
}
/**
 * clearTables: frees the tables of comp/jump codes and their translations.
 */
void clearTables(void)
{
 // clear computations table
 int i;
 for (i = 0; i < COMP_TABLE_SIZE; i++)
 {
 free(compDict[i++]);
 }
 // clear jump table
 for (i = 0; i < JUMP_TABLE_SIZE; i++)
 {
 free(jumpDict[i++]);
 }
 // clear symbol table
 symNode* pos = symHead;
 symNode* next;
 while (pos != NULL)
 {
 next = pos->next;
 free(pos);
 pos = next;
 }
}
/** 
 * decodeA: reads in an A instruction from source, and outputs the a-instruction to out, converted to binary.
 * returns source line number, or -1 if error.
*/
int decodeA(FILE* source, FILE* output, int line)
{
 static int varNum = 16;
 char* instruction = malloc(MAX_SYMBOL_SIZE + 1); //holds the number in the @instruction
 if (instruction == NULL)
 {
 fprintf(stderr, "Error (decodeA): cannot malloc instruction\n");
 return -1;
 }
 // read in the @ instruction
 int i = 0;
 char c;
 if ((c = fgetc(source)) && !isdigit(c)) // symbol
 {
 do
 {
 if (i > MAX_SYMBOL_SIZE)
 {
 fprintf(stderr, "Error (line %d): symbol too large (max length %d chars)\n", line, MAX_SYMBOL_SIZE);
 return -1;
 }
 instruction[i++] = c;
 } while ((c = fgetc(source)) && !isspace(c) && c != EOF);
 if (i == 0)
 {
 fprintf(stderr, "Error (line %d): expected value for A-instruction\n", line);
 return -1;
 }
 instruction[i] = '0円';
 // search table for instruction
 symNode* pos;
 for (pos = symHead; pos != NULL; pos = pos->next)
 {
 if (strcmp(instruction, pos->symbol) == 0)
 {
 fprintf(output, pos->translation);
 break;
 }
 }
 if (pos == NULL) // symbol not in table: add it!
 {
 char* tempTran = malloc(17);
 int k = 0;
 int j;
 int v = varNum;
 for (j = 15; j >= 0; j--, k++)
 {
 tempTran[k] = '0' + ((v >> j) & 1); 
 }
 tempTran[k] = '0円';
 addSym(instruction, tempTran, 0);
 varNum++;
 // output symbol
 fprintf(output, tempTran);
 fputc('\n', output);
 return line;
 }
 }
 if (isdigit(c)) // non-symbolic a-instruction
 {
 do
 {
 if (i > 4)
 {
 fprintf(stderr, "Error (line %d): integer too large\n", line);
 return -1;
 }
 instruction[i++] = c;
 } while ((c = fgetc(source)) && isdigit(c));
 if (i == 0)
 {
 fprintf(stderr, "Error (line %d): expected value for A-instruction\n", line);
 return -1;
 }
 instruction[i] = '0円';
 // convert the @ instruction to int
 int v = atoi(instruction);
 free(instruction);
 if (v > MAX_A || v < 0) 
 {
 fprintf(stderr, "Error (line %d): %d is an invalid integer\n", line, v);
 return -1;
 }
 // output the a-instruction converted to binary
 for (i = 15; i >= 0; i--)
 {
 fputc('0' + ((v >> i) & 1), output);
 }
 }
 // carry on reading until newline
 while (c != '\n' && c != EOF)
 {
 c = fgetc(source);
 }
 if (c == '\n')
 {
 fputc('\n', output);
 line++;
 }
 return line;
}
/**
 * writeComp: translates comp and outputs it to output.
 * returns true on success, else false.
 */
bool writeComp(char* comp, FILE* output)
{
 // search computations for the comp
 int i;
 for (i = 0; i < COMP_TABLE_SIZE; i++)
 {
 if (strcmp(compDict[i]->entry, comp) == 0)
 {
 // found
 fprintf(output, compDict[i]->translation);
 return true;
 }
 }
 // not found
 printf("%s not found\n", comp);
 return false;
}
/**
 * writeJump: translates jump and outputs it to output.
 * returns true on success, else false.
 */
bool writeJump(char* jump, FILE* output)
{
 // search jump table for the jump
 int i;
 for (i = 0; i < JUMP_TABLE_SIZE; i++)
 {
 if (strcmp(jumpDict[i]->entry, jump) == 0)
 {
 // found
 fprintf(output, jumpDict[i]->translation);
 return true;
 }
 }
 // not found
 return false;
}
/** 
 * decodeC: reads in a C instruction from source (first char is c), and outputs the C-instruction to out, converted to binary.
 * returns line number, or -1 on error.
*/
int decodeC(char c, FILE* source, FILE* output, int line)
{
 // C-instructions have three parts: dest, comp, and jump.
 char* dest = malloc(4);
 char* comp = malloc(4);
 char* jump = malloc(4);
 if (dest == NULL || dest == NULL || jump == NULL)
 {
 fprintf(stderr, "Error (line: %d): cannot malloc\n", line);
 }
 char* buffer = malloc(4);
 int i = 0;
 bool destIn = false;
 bool compIn = false;
 bool jumpIn = false;
 do
 {
 if (i > 3)
 {
 fprintf(stderr, "Error (line: %d): invalid instruction\n", line);
 }
 else if (c == '=') // buffer is dest
 {
 strcpy(dest, buffer);
 dest[i] = '0円';
 destIn = true;
 i = 0;
 }
 else if ((((c == '\n') || (c == '/') || c == EOF) && !compIn) || c == ';') // buffer is comp
 {
 strcpy(comp, buffer);
 comp[i] = '0円';
 compIn = true;
 i = 0;
 }
 else if (((c == '\n') || (c == '/') || c == EOF) && compIn) // buffer is jump
 {
 strcpy(jump, buffer);
 jump[i] = '0円';
 jumpIn = true;
 i = 0;
 }
 else if (!isspace(c) && c != '/')
 {
 buffer[i++] = c;
 }
 if (c == '\n' || c == '/' || c == EOF)
 {
 break;
 }
 }
 while (c = fgetc(source));
 // write C-instruction code (111)
 fprintf(output, "111");
 if (compIn) 
 {
 if (writeComp(comp, output) == false)
 {
 fprintf(stderr, "Error (line: %d): cannot translate '%s'\n", line, comp);
 return -1;
 }
 }
 else
 {
 // write default comp code
 fprintf(output, "111101010");
 }
 if (destIn)
 {
 if (strchr(dest, 'A') != NULL)
 {
 fputc('1', output);
 }
 else
 {
 fputc('0', output);
 }
 if (strchr(dest, 'D') != NULL)
 {
 fputc('1', output);
 }
 else
 {
 fputc('0', output);
 }
 if (strchr(dest, 'M') != NULL)
 {
 fputc('1', output);
 }
 else
 {
 fputc('0', output);
 }
 }
 else
 {
 // write default dest
 fprintf(output, "000");
 }
 if (jumpIn)
 {
 if (writeJump(jump, output) == false)
 {
 fprintf(stderr, "Error (line: %d): cannot translate jump '%s'\n", line, jump);
 return -1;
 }
 }
 else
 {
 // write default jump
 fprintf(output, "000");
 }
 free(dest);
 free(comp);
 free(jump);
 free(buffer);
 fputc('\n', output);
 line++;
 return line;
}
/**
 * loadLabels: populates the symbol dictionary with all of the labels in the file.
 * returns true on success, else false.
 */
bool loadLabels(FILE* source)
{
 char* tempLabel;
 char* tempTran;
 int line = 0;
 bool definingLabel = false; // are we defining a label?
 bool comment = false; // are we in a comment?
 bool content = false; // is there content on the current line?
 bool addLabel = false; // should we add the current line to the label tag?
 int numLabels = 0;
 char c;
 int i = 0; // label pos
 while ((c = fgetc(source)) != EOF)
 {
 if (c == '/')
 {
 comment = true;
 }
 else if (c == '(' && !comment) // new label
 {
 if (definingLabel)
 {
 fprintf(stderr, "Error (line %d): cannot enter '(' in label name\n", line);
 return false;
 }
 definingLabel = true;
 tempLabel = malloc(MAX_SYMBOL_SIZE + 1);
 if (tempLabel == NULL)
 {
 fprintf(stderr, "Error (line %d): cannot malloc tempLabel\n", line);
 return false;
 }
 }
 else if (c == ')' && !comment)
 {
 if (!definingLabel)
 {
 fprintf(stderr, "Error (line %d): cannot enter ')' outside label\n", line);
 return false;
 } 
 definingLabel = false;
 // add to dict
 tempLabel[i] = '0円';
 i = 0;
 addSym(tempLabel, "", line);
 addLabel = true;
 numLabels++;
 }
 else if (definingLabel && !comment)
 {
 if (isspace(c))
 {
 fprintf(stderr, "Error (line %d): cannot enter whitespace in label name\n", line);
 return false;
 }
 else
 {
 tempLabel[i++] = c;
 }
 }
 if (c == '\n')
 {
 comment = false;
 if (content)
 {
 line++;
 }
 content = false;
 }
 else if (!isspace(c) && !comment && !definingLabel && c != ')')
 {
 content = true;
 if (addLabel)
 {
 tempTran = malloc(17);
 int v = line;
 int k = 0;
 int j;
 for (j = 15; j >= 0; j--, k++)
 {
 tempTran[k] = '0' + ((v >> j) & 1); 
 }
 tempTran[k] = '0円';
 for (symNode* pos = symHead; numLabels > 0; numLabels--)
 {
 strcpy(pos->translation, tempTran);
 pos = pos->next;
 }
 addLabel = false;
 }
 }
 }
 if (addLabel)
 {
 tempTran = malloc(17);
 int v = line;
 int k = 0;
 int j;
 for (j = 15; j >= 0; j--, k++)
 {
 tempTran[k] = '0' + ((v >> j) & 1); 
 }
 tempTran[k] = '0円';
 for (symNode* pos = symHead; numLabels > 0; numLabels--)
 {
 strcpy(pos->translation, tempTran);
 pos = pos->next;
 }
 }
 // rewind the file
 fseek(source, 0, SEEK_SET);
 return true;
}
bool assemble(FILE* source, FILE* output)
{
 // build translation tables
 if (buildTables() == false)
 {
 fprintf(stderr, "Terminating program due to error\n");
 return 1;
 }
 if (loadLabels(source) == false)
 {
 fprintf(stderr, "Terminating program due to error\n"); 
 return 1;
 }
 // main read loop
 char c;
 bool comment = false; // are we in a comment?
 bool label = false; // are we in a label?
 int line = 1; // source line number
 while ((c = fgetc(source)) != EOF)
 {
 if (c == '/')
 {
 comment = true;
 }
 else if (c == '\n')
 {
 line++;
 comment = false; // newline breaks comments
 }
 else if (c == '(')
 {
 label = true;
 }
 else if (c == ')')
 {
 label = false;
 }
 else if (label)
 {
 continue;
 }
 else if (isspace(c))
 {
 continue;
 }
 else if (comment) 
 {
 continue; // skip comments
 }
 else if (c == '@') // A-INSTRUCTION
 {
 line = decodeA(source, output, line);
 }
 else // C-INSTRUCTION (or invalid)
 {
 line = decodeC(c, source, output, line);
 }
 if (line == -1)
 {
 fprintf(stderr, "Terminating assembly due to error\n");
 return false;
 }
 }
 // clear translation tables
 clearTables();
 fclose(source);
 fclose(output);
 printf("Assembly successful\n");
 return true;
}
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Sep 14, 2015 at 20:15
\$\endgroup\$
1

1 Answer 1

1
\$\begingroup\$

Linear searches

I noticed that there are several places where you search through your various tables using linear searches. It would be easy to sort your tables and add a search function that uses a binary search instead.

Currently your symHead points to a linked list of symbols, but you could modify that to be an array instead (just resize as needed). Then you could use a binary search on it.

Trie

Depending on your speed requirements, you could even go a step further and build a trie for your symbol table. Searches and symbol additions would be faster using a trie than using a sorted array.

However, you should only do this if you expect large symbol tables and if execution time is a major concern. For small input files, I doubt that using a trie will make a noticeable difference.

answered Sep 17, 2015 at 20:54
\$\endgroup\$
1
  • \$\begingroup\$ Thanks! I was wondering whether or not to sort -- I wasn't sure whether it was worth O(n lg n) time to sort the array every time you add a symbol just to gain O(lg n) speed on binary search rather than O(n). I guess it is worth it, since you add symbols far less than you search them. I'll get on it! :D \$\endgroup\$ Commented Sep 19, 2015 at 16:44

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.