Yesterday I got a sudden urge to go from writing Python to something, well, more UNIX core-ish. I read some examples on here and decided I might as well put some of that stuff to use to test something else I'm working on: computability theory. Ergo: I wanted to write a basic, one tape, Turing machine simulator.
It works, as far as I know. It's not brilliant, and the Turing machine is hard coded in this early version, but it is functional.
I'd really like some review on the code. One thing I'm particularly curious about is whether I'm doing appropriate error checking and handling. e.g. are there specific cases where assert
is more appropriate or where I should do more error checking?
turing.h
#ifndef __turing_h__
#define __turing_h__
#define MAX_TRANSITIONS 5
#define MAX_STATES 25
// forward declare structs
struct State;
struct Transition;
typedef enum {
LEFT, RIGHT
} Direction;
typedef enum {
FALSE, TRUE
} Bool;
struct Transition {
char input;
char write;
Direction move;
struct State *next;
};
typedef struct Transition Transition;
struct State {
int id;
int trans_count;
struct Transition* transitions[ MAX_TRANSITIONS ];
Bool accept;
Bool reject;
};
typedef struct State State;
struct Turing {
int state_count;
State* states[ MAX_STATES ];
State* current;
int head;
};
typedef struct Turing Turing;
#endif
turing.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "turing.h"
// disable debug mode
#define NDEBUG
#include "debug.h"
// used to hand out ids
int state_id = 0;
void die( char *message )
{
if( message )
{
printf( "Error: %s.\n", message );
}
// exit unsuccesfully
exit(1);
}
Transition* Transition_create( char input, char write, Direction move, State *next )
{
// allocate memory
Transition *trans = malloc( sizeof( Transition ));
if( ! trans ) die( "Memory error" );
trans->input = input;
trans->write = write;
trans->move = move;
trans->next = next;
return trans;
}
void Transition_destroy( Transition* trans )
{
free( trans );
}
State* State_create( Bool accept, Bool reject )
{
// allocate mem
State *state = malloc( sizeof( State ));
if( ! state ) die( "Memory error" );
state->id = state_id++;
state->accept = accept;
state->reject = reject;
state->trans_count = 0;
return state;
}
void State_add_transition( State *state, Transition *trans )
{
// check if we can still add another transition
if( state->trans_count == MAX_TRANSITIONS ) {
char buffer[ 50 ];
sprintf( buffer, "State %d already has the maximum amount of transitions.", state->id );
die( buffer );
}
// add the transition
state->transitions[ state->trans_count ] = trans;
state->trans_count++;
}
void State_destroy( State *state )
{
int i = 0;
// loop over its transitions
for( i = 0; i < state->trans_count; i++ ) {
Transition *trans = state->transitions[ i ];
if( !trans ) die( "Could not fetch transition." );
Transition_destroy( trans );
}
free( state );
}
Turing* Turing_create()
{
// allocate mem
Turing *machine = malloc( sizeof( Turing ));
machine->state_count = 0;
machine->current = NULL;
machine->head = 0;
return machine;
}
void Turing_destroy( Turing *machine )
{
int i = 0;
// loop over it's states
for( i = 0; i < machine->state_count; i++ ) {
State *state = machine->states[ i ];
if( !state ) die( "Could not fetch turing state" );
State_destroy( state );
}
free( machine );
}
void Turing_add_state( Turing *machine, State *state )
{
if( machine->state_count == MAX_STATES ) {
die( "The turing machine already has the maximum amount of states" );
}
// add the state
machine->states[ machine->state_count++ ] = state;
}
State* Turing_step( Turing *machine, char* tape, int tape_len )
{
int i = 0;
char input = tape[ machine->head ];
State* state = machine->current;
// look for a transition on the given input
for( i = 0; i < state->trans_count; i++ ) {
Transition* trans = state->transitions[ i ];
if( !trans ) die( "Transition retrieval error" );
// check if this is a transition in the given char input
if( trans->input == input ) {
debug( "Found transition for input %c", input );
State *next = trans->next;
if( !next ) die( "Transitions to NULL state" );
// write if nescesary
if( trans->write != '0円' ) {
debug( "Writing %c", trans->write );
tape[ machine->head ] = trans->write;
debug( "Writing done" );
}
// move the head
if( trans->move == LEFT ) {
if( machine->head > 0 ) {
machine->head--;
debug( "Moved head left" );
}
} else {
if( machine->head + 1 >= tape_len ) {
die( "Machine walked of tape on right side" );
}
machine->head++;
debug( "Moved head right" );
}
// move the machine to the next state
debug( "Setting current state" );
machine->current = next;
return next;
}
}
char buffer[ 50 ];
sprintf( buffer, "Turing machine blocked: state %d for input %c", state->id, input );
die( buffer );
}
void Turing_run( Turing *machine, char *tape, int tapelen )
{
// check if the start state is configured properly
if( !machine->current ) die( "Turing machine has now start state" );
while( TRUE ) {
State* state = Turing_step( machine, tape, tapelen );
if( state->accept ) {
printf( "Input accepted in state: %d\n", state->id );
break;
} else if( state->reject ) {
printf( "Input rejected in state: %d\n", state->id );
break;
} else {
printf( "Moved to state: %d\n", state->id );
}
}
}
int main( int argc, char* argv[] )
{
Turing* machine = Turing_create();
State* q1 = State_create( FALSE, FALSE );
State* q2 = State_create( FALSE, FALSE );
State* q3 = State_create( FALSE, FALSE );
State* q4 = State_create( FALSE, FALSE );
State* q5 = State_create( FALSE, FALSE );
State* qaccept = State_create( TRUE, FALSE );
State* qreject = State_create( FALSE, TRUE );
Transition* q1_r_space = Transition_create( ' ', '0円', RIGHT, qreject );
Transition* q1_r_x = Transition_create( 'x', '0円', RIGHT, qreject );
Transition* q1_q2_zero = Transition_create( '0', ' ', RIGHT, q2 );
Transition* q2_q2_x = Transition_create( 'x', '0円', RIGHT, q2 );
Transition* q2_a_space = Transition_create( ' ', '0円', RIGHT, qaccept );
Transition* q2_q3_zero = Transition_create( '0', 'x', RIGHT, q3 );
Transition* q3_q3_x = Transition_create( 'x', '0円', RIGHT, q3 );
Transition* q3_q4_zero = Transition_create( '0', '0円', RIGHT, q4 );
Transition* q3_q5_space = Transition_create( ' ', '0円', LEFT, q5 );
Transition* q4_q3_zero = Transition_create( '0', 'x', RIGHT, q3 );
Transition* q4_q4_x = Transition_create( 'x', '0円', RIGHT, q4 );
Transition* q4_r_space = Transition_create( ' ', '0円', RIGHT, qreject );
Transition* q5_q5_zero = Transition_create( '0', '0円', LEFT, q5 );
Transition* q5_q5_x = Transition_create( 'x', '0円', LEFT, q5 );
Transition* q5_q2_space = Transition_create( ' ', '0円', RIGHT, q2 );
State_add_transition( q1, q1_r_space );
State_add_transition( q1, q1_r_x );
State_add_transition( q1, q1_q2_zero );
State_add_transition( q2, q2_q2_x );
State_add_transition( q2, q2_a_space );
State_add_transition( q2, q2_q3_zero );
State_add_transition( q3, q3_q3_x );
State_add_transition( q3, q3_q4_zero );
State_add_transition( q3, q3_q5_space );
State_add_transition( q4, q4_q3_zero );
State_add_transition( q4, q4_q4_x );
State_add_transition( q4, q4_r_space );
State_add_transition( q5, q5_q5_zero );
State_add_transition( q5, q5_q5_x );
State_add_transition( q5, q5_q2_space );
Turing_add_state( machine, q1 );
Turing_add_state( machine, q2 );
Turing_add_state( machine, q3 );
Turing_add_state( machine, q4 );
Turing_add_state( machine, q5 );
Turing_add_state( machine, qaccept );
Turing_add_state( machine, qreject );
machine->current = q1;
char* input = "0000000000000000 ";
int len = strlen( input );
char* tape = malloc( len * sizeof( char ));
strcpy( tape, input );
Turing_run( machine, tape, len );
// clean up
Turing_destroy( machine );
free( tape );
}
debug.h
#ifndef __debug_h__
#define __debug_h__
#ifndef NDEBUG
#define debug(M, ... ) fprintf( stderr, "DEBUG: %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__ )
#else
#define debug(M, ... )
#endif
#endif
For now the simulator is configured to accept on input strings from the language:
L = { 0^2^n | n > 0 }
Or: all strings of 0s whose length is a power of 2.
If you happen to have Sipser Introduction to the Theory of Computation. It's on page 145 of the 2nd edition.
GIT repo up and documentation on the way (we have a tiny wiki with basic usage)
-
\$\begingroup\$ You might find this page interesting: codegolf.stackexchange.com/questions/8787/… \$\endgroup\$luser droog– luser droog2012年12月21日 07:57:56 +00:00Commented Dec 21, 2012 at 7:57
-
\$\begingroup\$ If you would like a review on the revised code, please ask another question. \$\endgroup\$200_success– 200_success2014年06月29日 22:08:47 +00:00Commented Jun 29, 2014 at 22:08
4 Answers 4
AJ, I like it. Very nicely coded.
Here are some comments. I've omitted things already mentioned by @LokiAstari
forward declaration of
struct Transition
is unnecessary.die
, prefer to print errors tostderr
exit(1)
preferexit(EXIT_FAILURE)
some noisey comments ("exit unsucessfully", "allocate memory" etc)
State_create
does not initialisetransitions
(probably benign)sprintf
is often used badly, resulting in buffer overflows.snprintf
is preferred. But as you use it only before dying, no risk.Turing_create
does not checkmalloc
(other uses do).do you prefer
type* var
ortype *var
? I prefer the latter, but whichever you use, consistency is best.don't you get a warning (control may reach end of non-void function, or similar) from
Turing_step
?in some places you can define loop variables in the loop:
for( int i = 0; ...
make local functions static. This is good practice but for a small project like this makes no difference.
your list of Transition_create calls in
main
would be more readable if aligned.your
main
has some issues in the copying ofinput
.strlen
returnssize_t
andmalloc
takessize_t
. So best to usesize_t
(-Wsign-conversion)- sizeof(char) is 1 by definition
- you malloced one too few bytes for the string copy.
- you could have used
strdup
if you have it. - but note that mallocing a copy of
input
is not necessary. You can just declare the string aschar input[] = "..."
instead ofchar* input = "..."
and it will be writable. - if you use
char input[]
you need not dostrlen(input)
- justsizeof input -1
passing the length of the input string around seems unnecessary (users of the string can check for the terminating '0円')
On error checking and use of assert
, I think you have done a good job of
checking. I'm not a big user of asserts (so maybe my thoughts on them are not
of much use to you). I'm always uneasy using an assert - they are funny
things. They say, in effect, that the tested condition is so important that it
is worth exiting if it occurs during testing, but that if it occurs during
normal use (NDEBUG) it can be ignored. That is illogical unless you can be
sure that all possible assert failures can be detected during testing. To me
that implies that asserts should be used only to test for errors that are
intrinsic to the program and have no dependence upon the data or the runtime.
For example checking whether malloc
failed would use an if
condition
whereas checking for something that can only happen if an algorithm is wrongly
coded is a fair use of assert
. In your case, I might be tempted to use
assert
where you check array entries are not NULL:
Transition *trans = state->transitions[ i ];
assert( trans );
But I've seen asserts sprinkled around like confettii, so opinions clearly differ. Maybe other reviewers will be able to help.
-
\$\begingroup\$ Thank you. Some of these I already incorporated. Some of them I'll research a bit. I agree with you on the assert/if dilemma. I see alot of code (in other languages) that seem to forget that asserts are usually not for runtime checking. \$\endgroup\$A.J.Rouvoet– A.J.Rouvoet2012年12月21日 10:26:22 +00:00Commented Dec 21, 2012 at 10:26
-
\$\begingroup\$ About passing around the string length: I'm getting the impressions there are several opinions on the matter. You either have to make SURE all strings are 0円 terminated and rely on C string tools and/or build in extra checks to prevent overflows. You clearly vote for the former. Anyone else? \$\endgroup\$A.J.Rouvoet– A.J.Rouvoet2012年12月21日 10:28:44 +00:00Commented Dec 21, 2012 at 10:28
-
\$\begingroup\$ AJ, strings should always be terminated with 0円 but arrays in general are not. But strings are arrays - maybe that is where the confusion arrises... Note that I corrected a piece of badly wrong advice above (using sizeof on a pointer!) \$\endgroup\$William Morris– William Morris2012年12月21日 12:43:22 +00:00Commented Dec 21, 2012 at 12:43
-
\$\begingroup\$ No, there is no confusion. I'm just deliberating whether I should TRUST the strings in my program to be properly terminated. Should I? \$\endgroup\$A.J.Rouvoet– A.J.Rouvoet2012年12月21日 13:12:16 +00:00Commented Dec 21, 2012 at 13:12
-
\$\begingroup\$ Yes, if it is created by the compiler (from your quoted string, as in "01234..") it will definitely be terminated. If created by someone using strcpy (as in your original code) then it depends upon the code. But if the 0円 is missing, using strlen to find out how long it is will also fail, as strlen won't find the 0円 it needs. So in practice you have to assume it is correctly terminated. \$\endgroup\$William Morris– William Morris2012年12月21日 13:26:14 +00:00Commented Dec 21, 2012 at 13:26
Identifiers with two underscores are reserved for the implementation (so don't use them). Also MACROS (things defined by #define) are traditionally all uppercase (because they do not obey scope rules (because they are not part of the C language but part of the pre-processor system) we make them all uppercase to make sure that we don't accidentally clash with other identifiers).
#ifndef __turing_h__
#define __turing_h__
If you are going to forward declare these then also do the appropriate typedefs.
struct State;
struct Transition;
// Add
typedef struct State State;
typedef struct Transition Transition;
Now you will be able to refer to then without using the prefix struct.
Don't use all caps. A macro (with no scope boundary may play havake with these identifiers).
typedef enum {
LEFT, RIGHT
} Direction;
The system header files aready define FALSE/TRUE. Also in this situation you define TRUE as 1. This is not correct. The only thing you can say about TRUE
is (!FALSE)
. Which is not necessarily the same as 1.
typedef enum {
FALSE, TRUE
} Bool;
Nothing wrong with this. But if you lay out your type with the chars first then you may not get the best packing arrangement. Always order them from largest to smallest (if you care about layout (usually it is not a big thing but if you have large space requirements and can become an issue)).
struct Transition {
char input;
char write;
Direction move;
State *next;
};
So I would have layed it out like this:
typedef struct Transition {
State *next; // At least as big as an int (but can be bigger).
Direction move; // Enum is an int.
char input; // Now chars.
char write;
} Transition;
-
\$\begingroup\$ "Don't use all caps." Actually, all caps is a very common way to declare constants, enums and all manner of global identifiers, not just macros. \$\endgroup\$Lundin– Lundin2012年12月21日 07:25:01 +00:00Commented Dec 21, 2012 at 7:25
-
\$\begingroup\$ "...you define TRUE as 1. This is not correct." Yes it is perfectly correct. Always make the code compatible with the C standard bool type, which is explicitly guaranteed to be 1 and not some magical "not 0 maybe 1". The best is of course to use the standard bool type from stdbool.h. \$\endgroup\$Lundin– Lundin2012年12月21日 07:27:46 +00:00Commented Dec 21, 2012 at 7:27
-
\$\begingroup\$ Ah
stdbool.h
. Another useful note! \$\endgroup\$A.J.Rouvoet– A.J.Rouvoet2012年12月21日 10:29:29 +00:00Commented Dec 21, 2012 at 10:29
I just have one small note in addition to the other answers.
Your #include
s should be organized differently:
#include <stdio.h> #include <stdlib.h> #include <assert.h> #include <string.h> #include "turing.h"
It's best to include your own headers first, in order to avoid any possible dependency issues with the system libraries. In other words, you shouldn't force your headers to be exposed to other libraries, especially if that is not intended to happen.
In addition, you can sort your headers in a certain order, such as alphabetically. This will make it easier to keep track of them.
#include "turing.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
as the malloc() return void *
you must correct all the memory allocation statements
Turing *machine = malloc( sizeof( Turing ));
---> Turing *machine = (Turing*) malloc( sizeof( Turing ));
and so on
-
2\$\begingroup\$ No, don't do that. stackoverflow.com/questions/605845/… \$\endgroup\$Mat– Mat2017年01月21日 10:36:21 +00:00Commented Jan 21, 2017 at 10:36
Explore related questions
See similar questions with these tags.