I wrote brainfuck interpreter in order to prepare myself for a C job. I try to write the code as clear and as defensively as I can. Can somebody take a look at the code and give me some hints for improvement?
// tape.h
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef struct Tape {
long pointer;
long capacity;
unsigned short *data;
} Tape;
void initializeTape(Tape *tape);
void growTape(Tape *tape);
void incrementPointer(Tape *tape);
void decrementPointer(Tape *tape);
void incrementValue(Tape *tape);
void decrementValue(Tape *tape);
void read(Tape *tape);
void get(Tape *tape);
void freeTape(Tape *tape);
long interpret(Tape *tape, const char *source_code, long source_code_size, long position);
The implementation of tape.h
// tape.c
#include "tape.h"
void initializeTape(Tape *tape) {
tape->pointer = 0;
tape->capacity = 8;
tape->data = (unsigned short *) calloc( tape->capacity, sizeof(unsigned short));
if (tape->data == NULL) {
fprintf(stderr, "Out of memory error.\n");
exit(1);
}
}
void growTape(Tape *tape) {
tape->capacity *= 2;
tape->data = (unsigned short *) realloc(tape->data, tape->capacity);
if (tape->data == NULL) {
fprintf(stderr, "Out of memory error.\n");
exit(1);
}
}
void incrementPointer(Tape *tape) {
if (tape->pointer >= tape->capacity) {
growTape(tape);
}
tape->pointer++;
}
void decrementPointer(Tape *tape) {
if (tape->pointer == 0) {
fprintf(stderr, "Syntax error. Negative pointer detected.");
exit(1);
}
tape->pointer--;
}
void incrementValue(Tape *tape) {
tape->data[tape->pointer]++;
}
void decrementValue(Tape *tape) {
tape->data[tape->pointer]--;
}
void read(Tape *tape) {
putchar(tape->data[tape->pointer]);
}
void get(Tape *tape) {
tape->data[tape->pointer] = (char) getchar();
}
void freeTape(Tape *tape) {
free(tape->data);
tape->pointer = 0;
tape->capacity = 0;
}
long interpret(Tape *tape, const char *source_code, long source_code_size, long position) {
char c = source_code[position];
switch (c) {
case '>':
incrementPointer(tape);
break;
case '<':
decrementPointer(tape);
break;
case '+':
incrementValue(tape);
break;
case '-':
decrementValue(tape);
break;
case '.':
read(tape);
break;
case ',':
get(tape);
break;
case '[':
if (tape->data[tape->pointer] == (char) 0) {
int stack = 1;
long j = position + 1;
for (; j < source_code_size && stack > 0 && tape->pointer < source_code_size; j++) {
char _c = source_code[j];
if (_c == '[') {
++stack;
} else if (_c == ']') {
--stack;
}
}
if (stack != 0) {
fprintf(stderr, "Syntax error. Missing closing ].\n");
exit(1);
} else {
position = j + 1;
}
}
break;
case ']':
if (tape->data[tape->pointer] != (char) 0) {
int stack = 1;
long j = position - 1;
for (; j >= 0 && stack > 0 && tape->pointer >= 0; j--) {
char _c = source_code[j];
if (_c == '[') {
--stack;
} else if (_c == ']') {
++stack;
}
}
if (stack != 0) {
fprintf(stderr, "Syntax error. Missing opening [.\n");
exit(1);
} else {
position = j + 1;
}
}
break;
default:
break;
}
return ++position;
}
And the main
file:
// main.c
#include "tape.h"
int main(int argc, char **argv) {
FILE *file;
if (argc < 2) {
file = fopen("helloworld.bf", "r");
} else {
file = fopen(argv[1], "r");
}
if (file == NULL) {
fprintf(stderr, "Can not open file %s\n", argv[1]);
return 1;
}
if (fseek(file, 0L, SEEK_END) != 0) {
fprintf(stderr, "Fail to fseek file %s\n", argv[1]);
return 1;
}
long filesize = ftell(file);
if (filesize < 0) {
fprintf(stderr, "Fail to read file's size\n");
return 1;
}
rewind(file);
char source_code[filesize];
size_t result = fread(source_code, 1, filesize, file);
if (fclose(file) != 0) {
fprintf(stderr, "Can not close file %s\n", argv[1]);
return 1;
}
if (result != filesize) {
fprintf(stderr, "Can not read file. Corrupt\n");
}
Tape tape;
initializeTape(&tape);
long i = 0;
while(i < filesize) {
i = interpret(&tape, source_code, filesize, i);
}
freeTape(&tape);
return 0;
}
4 Answers 4
Overall Observations
An interpreter should be able to read from standard in as well as from a file, this would break the entire tape model. The user could also redirect an input file to standard in.
If you are going to program in C then you need to get comfortable with pointers.
In the case of file input I would use an algorithm that reads a line at a time, that way the file doesn't need to be read twice and the memory used to store the file doesn't need to be allocated. Reading a line at a time will also work for console input. If you are use C in an embedded environment allocating the space to store the memory could seriously affect the amount of memory available for processing. For this reason you also need to be careful when using malloc()
, calloc()
, or realloc()
in an embedded environment. Some embedded C compilers do not support memory allocation and some companies will have coding standards that do not include memory allocation for embedded applications.
Only Include Headers Needed to Make the Code Compile
The header file tape.h
includes assert.h
and assert()
is not used in the program. Since the C pre-processor implementation of include is generally to create a temporary source file and actually copy the included header files this increases the size of the temporary source files without need and increases compile time.
Hiding #include
statements within other include files can sometimes lead to problems, include files that are necessary to make the header compile and tape.h
doesn't need any header files to compile. An example of when it would be necessary to include a header file in tape.h
is if there were functions that returned type bool
and then the header file should contain the statement #include <stdbool.h>
.
Make it clear what each C source file needs to compile by including the headers in the C source file.
As a side note, it is better not to use assert
since if the code is optimized all asserts will be optimized out of the code.
Performance (speed)
In the main loop of the program and in the function interpret()
execution time might be improved if you used character pointers rather than integer indexing. In addition to possibly improving the performance this could also decrease the amount of code in the function interpret()
by reducing the number of temporary variables. Note the following code has not been tested and may not work.
In main()
:
char* current_source_code_ptr = source_code;
char* end_file_ptr = &source_code[filesize - 1];
while (current_source_code_ptr < end_file_ptr) {
current_source_code_ptr = interpret(current_source_code_ptr, end_file_ptr, source_code, &tape);
}
char* interpret(char* current_source_code_ptr, const char* end_file_ptr, const char *source_code, Tape* tape) {
switch (*current_source_code_ptr) {
case '>':
incrementPointer(tape);
break;
case '<':
decrementPointer(tape);
break;
case '+':
incrementValue(tape);
break;
case '-':
decrementValue(tape);
break;
case '.':
read(tape);
break;
case ',':
get(tape);
break;
case '[':
if (tape->data[tape->pointer] == (char)0) {
int stack = 1;
for (; current_source_code_ptr < end_file_ptr && stack > 0 && tape->pointer < (end_file_ptr - source_code); current_source_code_ptr++) {
if (*current_source_code_ptr == '[') {
++stack;
}
else if (*current_source_code_ptr == ']') {
--stack;
}
}
if (stack != 0) {
fprintf(stderr, "Syntax error. Missing closing ].\n");
exit(EXIT_FAILURE);
}
else {
current_source_code_ptr++;
}
}
break;
case ']':
if (tape->data[tape->pointer] != (char)0) {
int stack = 1;
for (; current_source_code_ptr >= source_code && stack > 0 && tape->pointer >= 0; current_source_code_ptr--) {
if (*current_source_code_ptr == '[') {
--stack;
}
else if (*current_source_code_ptr == ']') {
++stack;
}
}
if (stack != 0) {
fprintf(stderr, "Syntax error. Missing opening [.\n");
exit(EXIT_FAILURE);
}
else {
current_source_code_ptr++;
}
}
break;
default:
break;
}
return ++current_source_code_ptr;
}
Complexity
The switch/case statement in the function interpret()
is too long, each case should be implemented by a function, so the code for case '[':
and case ']':
should be moved into separate functions.
Use System Defined Constants
The header file stdlib.h
includes system specific definitions for the macros EXIT_SUCCESS and EXIT_FAILURE. This would make the code more readable and possibly more portable.
// main.c
#include <stdio.h>
#include <stdlib.h>
#include "tape.h"
int main(int argc, char** argv) {
FILE* file;
if (argc < 2) {
file = fopen("helloworld.bf", "r");
}
else {
file = fopen(argv[1], "r");
}
if (file == NULL) {
fprintf(stderr, "Can not open file %s\n", argv[1]);
return EXIT_FAILURE;
}
if (fseek(file, 0L, SEEK_END) != 0) {
fprintf(stderr, "Fail to fseek file %s\n", argv[1]);
return EXIT_FAILURE;
}
long filesize = ftell(file);
if (filesize < 0) {
fprintf(stderr, "Fail to read file's size\n");
return EXIT_FAILURE;
}
rewind(file);
char source_code[filesize];
size_t result = fread(source_code, 1, filesize, file);
if (fclose(file) != 0) {
fprintf(stderr, "Can not close file %s\n", argv[1]);
return EXIT_FAILURE;
}
if (result != filesize) {
fprintf(stderr, "Can not read file. Corrupt\n");
}
Tape tape;
initializeTape(&tape);
long i = 0;
while (i < filesize) {
i = interpret(&tape, source_code, filesize, i);
}
freeTape(&tape);
return EXIT_SUCCESS;
}
-
4\$\begingroup\$ "a line at a time, ... doesn't need to be allocated" - does Brainfuck have a strict line length limit, then? \$\endgroup\$Toby Speight– Toby Speight2020年11月09日 09:39:45 +00:00Commented Nov 9, 2020 at 9:39
-
4\$\begingroup\$ I would be careful in assuming pointer arithmetic is more efficient than pointer + integer offset. Often CPUs have native instructions handling pointer + offset from register. And if the compiler can inline everything, it probably won't make any difference at all. In this case, it doesn't matter much, except checking whether the end of the tape is reached in
incrementPointer()
anddecrementPointer()
is IMO slightly nicer to write using offsets than using pointers. \$\endgroup\$G. Sliepen– G. Sliepen2020年11月09日 12:59:56 +00:00Commented Nov 9, 2020 at 12:59
Only include the header files that you need
Looking at tape.h
, it only contains declarations and no definitions. As such, the header files only serve to bloat up the source code and increase compilation time. You should move them to tape.c
.
Use static
methods if possible
If I look at main.c
, the only functions that are utilized are initializeTape
, interpret
and freeTape
. These are the only functions that form the interface. You could move the other functions into tape.c
and declare them static
. Remember, header files should only contain the necessary functions.
Use fixed width integers from <stdint.h>
I'm not a fan of using data types such as long
, unsigned short
, long long
since the standard makes no guarantees about the actual size of these types; only the minimum size. Prefer using fixed types such as int64_t
, uint16_t
, intptr_t
, etc.
initializeTape
and growTape
should not exit
Imagine being a user trying to use your code in one of their projects. If you fail to allocate, the program will exit and will not give the user control on how to deal with the error.
Consider returning a value based on whether memory was successfully allocated, such as 0 or -1, or even true
or false
if you have access to C99. That way, the user can check and decide what to do in case of failure.
if(!initializeTape(&tape))
{
// Do some error handling here
}
You don't have to cast to unsigned short *
after allocating
Not a problem, but I should mention that it is not necessary to cast to the desired type after allocating as void*
is implicitly convertible to other pointer types.
Check tape == NULL
in freeTape
This could lead to potential segfaults if you're not careful.
-
7\$\begingroup\$ tape.h only contains declarations and no definitions. - Yeah - that's the point. It should stay like this; that's the standard thing to do, and mashing those into the
.c
file will not buy you much compilation efficiency, assuming that we're not living in the 1970s. \$\endgroup\$Reinderien– Reinderien2020年11月08日 14:34:57 +00:00Commented Nov 8, 2020 at 14:34 -
3\$\begingroup\$ The standard does make some guarantees about the range of values that can be represented by the built-in types. Fixed-width types such as
int64_t
are usually a poor choice, especially compared toint_fast64_t
, for example. \$\endgroup\$Toby Speight– Toby Speight2020年11月09日 09:42:47 +00:00Commented Nov 9, 2020 at 9:42
You could use perror()
to give more useful information from library-call failures. For example, consider
if (file == NULL) { fprintf(stderr, "Can not open file %s\n", argv[1]); return EXIT_FAILURE; }
We could get better error message (e.g. "file not found", "permission denied", etc) like this:
if (!file) {
perror(argv[1]);
return EXIT_FAILURE;
}
-
2\$\begingroup\$ But
perror()
unfortunately doesn't show what the filename was. You can writefprintf(stderr, "Could not open %s: %s\n", argv[1], strerror(errno))
, or if you're on Linux or BSD there's the very convenienterr()
, sadly not standard C. \$\endgroup\$G. Sliepen– G. Sliepen2020年11月09日 12:53:11 +00:00Commented Nov 9, 2020 at 12:53 -
4\$\begingroup\$ Yes @G.Sliepen, though in my example, it does show the filename (at the expense of not showing which operation failed). I do usually end up using
fprintf()
withstrerror()
as you show (or, in GNU environmentsfprintf()
with%m
). \$\endgroup\$Toby Speight– Toby Speight2020年11月09日 13:16:30 +00:00Commented Nov 9, 2020 at 13:16
In general your code looks reasonable, there is only one thing that you need to keep for your self for future devs. In general, your function initializeTape and the rest of the file tape.c
void initializeTape(Tape *tape) {
tape->pointer = 0;
tape->capacity = 8;
tape->data = (unsigned short *) calloc( tape->capacity, sizeof(unsigned short));
if (tape->data == NULL) {
fprintf(stderr, "Out of memory error.\n");
exit(1);
}
}
should check that the pointer tape is not null
void initializeTape(Tape *tape) {
if (tape) {
tape->pointer = 0;
tape->capacity = 8;
tape->data = (unsigned short *) calloc( tape->capacity, sizeof(unsigned short));
if (tape->data == NULL) {
fprintf(stderr, "Out of memory error.\n");
exit(1);
}
} // else exit(-1) or whatever you choose
}
This will remove potentially issues (invalid pointer) if you extend your code.
Hope it helps
-
2\$\begingroup\$ It's normal for functions that take a pointer arg to require it to be non-NULL without checking for it, e.g.
fprintf
itself will just crash if you pass a NULL pointer for either of its first 2 args. There's nothing useful you can do in this function other than exit as a debug check. But dereferencing a NULL pointer will fairly reliably crash on most implementations, so just do that, and let people use a debugger to find where they're passing a NULL pointer from, if it ever happens. \$\endgroup\$Peter Cordes– Peter Cordes2020年11月09日 23:58:25 +00:00Commented Nov 9, 2020 at 23:58
void *newptr = realloc(...); if ( newptr ) data = newptr; else handle_nomemory();
\$\endgroup\$