The goal is to create a preprocessor for the C language, as small as possible in terms of source code size in bytes, in your preferred language. Its input will be a C source file, and its output will be the pre-processed source code.
The items that it will need to be able to process shall be: Comment removal (line/block), #include directives (by opening files at relative paths and replacing text at the point needed), #define, #undef, #if, #elif, #else, #endif, #ifdef, #ifndef, and defined(). Other C preprocessor directives like #pragmas or #errors may be ignored.
There is no need to calculate arithmetic expressions or comparison operators in #if directives, we assume the expression will evaluate to true as long as it contains an integer other than zero (its main use will be for the defined() directive). Examples of possible input and output follow (possible extra whitespaces in output files were trimmed for better appearance, there is no need for your code to do so). A program able to process the following examples properly will be considered sufficient.
----Input file: foo.c (main file being preprocessed)
#include "bar.h" // Line may or may not exist
#ifdef NEEDS_BAZZER
#include "baz.h"
#endif // NEEDS_BAZZER
#ifdef _BAZ_H_
int main(int argc, char ** argv)
{
/* Main function.
In case that bar.h defined NEEDS_BAZ as true,
we call baz.h's macro BAZZER with the length of the
program's argument list. */
return BAZZER(argc);
}
#elif defined(_BAR_H_)
// In case that bar.h was included but didn't define NEEDS_BAZ.
#undef _BAR_H_
#define NEEDS_BARRER
#include "bar.h"
int main(int argc, char ** argv)
{
return BARRER(argc);
}
#else
// In case that bar.h wasn't included at all.
int main()
{return 0;}
#endif // _BAZ_H_
----Input file bar.h (Included header)
#ifndef _BAR_H_
#define _BAR_H_
#ifdef NEEDS_BARRER
int bar(int * i)
{
*i += 4 + *i;
return *i;
}
#define BARRER(i) (bar(&i), i*=2, bar(&i))
#else
#define NEEDS_BAZZER // Line may or may not exist
#endif // NEEDS_BARRER
#endif // _BAR_H_
----Input file baz.h (Included header)
#ifndef _BAZ_H_
#define _BAZ_H_
int baz(int * i)
{
*i = 4 * (*i + 2);
return *i;
}
#define BAZZER(i) (baz(&i), i+=2, baz(&i))
#endif // _BAZ_H_
----Output file foopp.c (no edits)
int baz(int * i)
{
*i = 4 * (*i + 2);
return *i;
}
int main(int argc, char ** argv)
{
return (baz(&argc), argc+=2, baz(&argc));
}
----Output file foopp2.c (with foo.c's first line removed)
int main()
{return 0;}
----Output file foopp3.c (with bar.h's line "#define NEEDS_BAZZER" removed)
int bar(int * i)
{
*i += 4 + *i;
return *i;
}
int main(int argc, char ** argv)
{
return (bar(&argc), argc*=2, bar(&argc));
}
1 Answer 1
Flex, 1170+4=1174
1170 characters in the flex code + 4 characters for a compilation flag. To produce an executable, run flex pre.l ; gcc lex.yy.c -lfl
. (削除) The entry leaks memory like a sieve and doesn't close included files. (削除ここまで) But otherwise, it should be completely functional as per the spec.
%{
#define M malloc
#define X yytext
#define A a=X
#define B(x) BEGIN x;
#define Y YY_CURRENT_BUFFER
*a,*b,**v,**V,**t,**T,i,s=1,o;
g(){t=M(++s);T=M(s);for(i=1;i<s-1;i++)t[i]=v[i],T[i]=V[i];free(v);free(V);v=t;V=T;}
f(){for(i=1;i<s;i++)if(!strcmp(v[i],a))return i;return 0;}
d(y){X[yyleng-y]=0;}
%}
%x D F I
N .*\n
%%
"//".*
"/*"([^\*]|\*[^\/])*"*/"
\"(\\.|[^\\"])*\" ECHO;
^"#include "\"[^\"]*\" d(1),yypush_buffer_state(yy_create_buffer(fopen(X+10,"r"),YY_BUF_SIZE));
^"#define "[^ ]* {B(D)strcpy(a=M(yyleng),X+8);}
<D>" "?{N} {b=M(yyleng);d(1);f(strcpy(b,X+(X[0]==32)))?free(V[i]),V[i]=b:g(),v[s-1]=a,V[s-1]=b;B(0)}
^"#undef "{N} d(1),v[f(A+7)][0]=0;
^"#if defined(".*")\n" h(2,12);
^"#ifdef "{N} h(1,7);
^"#if "{N} {d(1);if(!atoi(X+4))B(F)}
^"#ifndef "{N} {d(1);if(f(A+8))B(F)}
<F>^"#if"{N} o++;
<F>^"#endif"{N} if(!o--)B(++o)
<F>^"#else"{N} if(!o)B(0)
<F>^"#elif defined(".*")\n" if(!o){d(2);if(f(A+14))B(0)}
<F>^"#elif "{N} if(!o){d(1);if(atoi(X+6))B(0)}
<F>{N}
^"#endif"{N}
^"#el"("se"|"if"){N} B(I)
<I>^"#endif"{N} B(0)
<I>{N}
[a-zA-Z_][a-zA-Z_0-9]* printf(f(A)?V[i]:a);
<<EOF>> {a=Y;yypop_buffer_state();if(!Y)exit(0);fclose(a);}
%%
h(x,y){d(x);if(!f(A+y))B(F)}
Some explanation:
a
andb
are temps to hold strings from the input.a
is also used as the parameter to functionf
.v
holds the names of macros andV
holds the 'V'alues of macrost
andT
are 't'emporary holders for when we growv
andV
i
is an 'i'ncrementer for loopss
is the 's'ize of the macro arrayo
is the count of the 'o'penif
s inside a false conditionalg()
'g'rows the macro arraysf()
'f'inds a macro with the same value inv
asa
d(y)
'd'rops the lasty
characters from the current input- state
D
is for inside a 'D'efine - state
F
is for ignoring a 'F'alse conditional - state
I
is for 'I'gnoringelse
/elif
after a true conditional was found.
EDIT1: cleaned up many of the memory leaks and implemented file closing
EDIT2: modified code to handle nested macros more correctly
EDIT3: crazy amount of golfing
EDIT4: more golfing
EDIT5: more golfing; I've also noticed that my call to fclose() causes issues on some computers...looking into this.
-
\$\begingroup\$ It works very well so far on most cases... for some reason it throws a segmentation fault when I
#include
stuff, but I guess this is related to the bug in edit #5. Also it doesn't substitute macros, even though it processes successfully the #if blocks - unless I'm doing something wrong... but in general it looks very good, and it gives a rough idea of what a lexer can do, since I can understand it even in its golfed form. Try to see if the bugs can be fixed, if not it's ok, as the code explains itself well, probably this will be chosen answer as there are no other entries. \$\endgroup\$Thanasis Papoutsidakis– Thanasis Papoutsidakis2014年03月04日 10:06:29 +00:00Commented Mar 4, 2014 at 10:06
#if
needs to be supported? i.e. does the preprocessor need to support expressions with arithmetic, bitwise operations, etc? \$\endgroup\$