After hours of working, I've finally finished my first C log parsing program! (previously was a bash script, now it is C).
Although I think I've gotten most everything, I was just wondering if you saw any possible buffer/memory overflow dangers in the coding?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#ifndef max
#define max(a, b) ((a)>(b))? (a) : (b)
#endif
long GetFileSize(FILE *fp){
long fsize = 0;
fseek(fp,0,SEEK_END);
fsize = ftell(fp);
fseek(fp,0,SEEK_SET);//reset stream position!!
return fsize;
}
char *lastline(char *filepath){
FILE *fp;
char buff[4096+1];
int size,i;
long fsize;
if(NULL==(fp=fopen(filepath, "r"))){
printf("You have not died recently enough for any information to be recorded.");
return NULL;
}
fsize= -1L*GetFileSize(fp);
if(size=fseek(fp, max(fsize, -4096L), SEEK_END)){
perror("cannot seek");
exit(0);
}
size=fread(buff, sizeof(char), 4096, fp);
fclose(fp);
buff[size] = '0円';
i=size-1;
if(buff[i]=='\n'){
buff[i] = '0円';
}
while(i >=0 && buff[i] != '\n')
--i;
++i;
return strdup(&buff[i]);
}
int main(int argc, char *argv[], char *envp[]){
typedef char * string;
char *last;
char *name;
char field_x[128];
char field_y[128];
char field_z[128];
char field_world[128];
char field_cause[128];
char field_killer[128];
name = getenv("MCEXEC_PLAYERNAME");
char *filename;
char *p;
char *ispvp;
int i;
char *f;
char output[200] = {0x00};
int index = 0;
char field_year[128];
char field_month[128];
char field_mix[128];
char field_mix2[128];
char field_day[128];
char field_hour[128];
char field_minute[128];
char field_seconds[128];
char *hummonth;
int dmon;
int dy;
char *cause_string;
int x;
char *deathtype;
string dtypes[15] = { "unknown", "water", "fire", "explosion", "lava", "fall", "cactus", "creeper", "skeleton", "spider", "zombie", "pigzombie", "slime", "ghast", "suicide" };
string dstrings[15] = { "You died for unknown reasons", "You Drowned", "You were burnt to death", "You were blown to pieces", "You tried to swim in lava", "You fell to your death", "You tried to hug a cactus", "You were killed by a (more to this but not needed for this post)
string realm[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
filename = malloc(sizeof "/home/minecraft/freedonia/playerdata/deathlog-.txt" - 1 + strlen(name) + 1);
if (!filename) exit(EXIT_FAILURE);
snprintf(filename,4096,"/home/minecraft/freedonia/playerdata/deathlog-%s.txt",name);
last = lastline(filename);
if( last != NULL ) {
printf( "%s\n", last );
sscanf(last, "%*[^:]:%*[^:]:%*[^:]:%127[^:]:", field_x);
sscanf(last, "%*[^:]:%*[^:]:%*[^:]:%*[^:]:%127[^:]:", field_y);
sscanf(last, "%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%127[^:]:", field_z);
sscanf(last, "%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%127[^:]:", field_world);
sscanf(last, "%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%*[^:]:%127[^:]:", field_cause);
sscanf(last, "%127[^ ] ", field_mix);
sscanf(field_mix, "%127[^-]-", field_year);
sscanf(field_mix, "%*[^-]-%127[^-]-", field_month);
sscanf(field_mix, "%*[^-]-%*[^-]-%127[^-]-", field_day);
sscanf(last, "%127[^:]:", field_mix2);
sscanf(field_mix2, "%*[^ ] %127[^ ] ", field_hour);
sscanf(last, "%*[^:]:%127[^:]:", field_minute);
sscanf(last, "%*[^:]:%*[^:]:%127[^:]:", field_seconds);
dmon = atoi(field_month);
hummonth = realm[dmon-1];
p = strchr(field_cause, '_');
printf( "X coord: %s\n", field_x);
printf( "Y coord: %s\n", field_y);
printf( "Z coord: %s\n", field_z);
printf( "World: %s\n", field_world);
printf( "Cause: %s\n", field_cause);
printf( "Year: %s\n", field_year );
printf( "Month: %s\n", field_month );
printf( "Day: %s\n", field_day );
printf( "Hour: %s\n", field_hour );
printf( "Minute: %s\n", field_minute );
printf( "Second: %s\n", field_seconds );
printf( "Human Month: %s\n", hummonth );
while (p != NULL) {
ispvp = "true";
sscanf(field_cause, "%*[^_]_%128[^_]_", field_killer);
printf( "Killer: %s\n", field_killer);
cause_string = malloc(sizeof "You were killed by §f\n" - 1 + strlen(field_killer) + 1);
snprintf(cause_string,128,"You were killed by §f%s\n",field_killer);
printf( "%s", cause_string);
p = NULL;
}
f = field_cause;
while( *f )
{
if (isalnum(*f) || *f == '_')
{
output[index++] = *f;
}
f++;
}
if (strcmp(ispvp,"true")!=0) {
for ( x = 0; x < 15; x++ ) {
deathtype = dtypes[x];
if (strcmp(field_cause,deathtype)==0) {
cause_string = dstrings[x];
printf( "%s\n", cause_string );
{break;}
}
}
}
dy = atoi(field_y);
if ( 0 > dy ) {
printf( "§eYou suffocated in the void §6at§f%s:%s §6on§f %s %s", field_hour, field_minute, hummonth, field_day );
printf("§6Coords: §f%s %s\n", field_x, field_z );
}
else if (strcmp(field_world,"normal")==0) {
printf("§e%s §6at§f%s:%s §6on§f %s %s\n", cause_string, field_hour, field_minute, hummonth, field_day );
printf("§6Coords: §f%s %s %s\n", field_x, field_y, field_z );
}
else if (strcmp(field_world,"normal")==0) {
printf("§e%s §6at§f%s:%s §6on§f %s %s\n", cause_string, field_hour, field_minute, hummonth, field_day );
printf("§6Coords: §f%s %s %s §6in the §fnether\n", field_x, field_y, field_z );
}
}
// printf("\"%s\"\n", last);
free(last);
return 0;
}
3 Answers 3
There are several things to improve in the code before looking for buffer overflows:
- Don't duplicate constant strings. E.g.
sizeof("You were killed by §f\n")
and thensnprintf(..., "You were killed by §f\n", ...)
) - Avoid raw constants (and duplicated) in the code. E.g.
char buff[4096+1]; ... fread(buff, sizeof(char), 4096, fp);
In this case you can define a constant/macro for4096
or just usesizeof(buff)
infread(buff, sizeof(char), sizeof(buff)-1, fp);
. In the code:
while(i >=0 && buff[i] != '\n') --i; ++i;
The
++i;
has a invalid indentation which can confuse the reader.Your macro
max
should have some parenthesis to enclose the whole result:#define max(a, b) ( ((a)>(b))? (a) : (b) )
Without those parenthesis a call like
(max(2,1)+3)
will give you 2 (instead of 5).- You've defined
string
type liketypedef char * string
but you're using thestring
type likeconst char*
. - You're calling several times
snprintf
function with an invalid 2nd argument (which is the same as callingsprintf
directly). If you are creating a buffer with a specific size inmalloc
, remember to use that size insnprintf()
. In
dmon = atoi(field_month); hummonth = realm[dmon-1];
The
atoi
is generally deprecated, because it's not required to seterrno
-- when it returns a zero, you won't know why. You should usestrtol
and ensure thatdmon
has sensible value. Ifdmon
is zero, thenerrno
will indicate whether it was really zero, or was there an error. In this case simply checking dmon to be between 1 and 12 inclusive will do the trick, as the zero value is not allowed.
There are more things, but anyway, generally I would recommend you to create tiny functions to do specific tasks: instead of having mallocs/snprints/scanfs all mixed together, write functions with minimal objectives which you can ensure that they don't contain bugs/memory buffer overflow. Then you are safe to use them in your main() function.
Adding to comments from @dacap:
- Open bracket in functions normall starts at column 0
- be kind to readers on small screens/windows: restrict lines to 80 chars
lastline()
filepath
should beconst
- should the error message on failure to open the file be to
stderr
? - message lacks
\n
size=fseek
assignment in condition geta compiler warning and is unnecessary. The value is not used. Just checkif (fseek(...) < 0)
Negative
fsize
andfseek
is too clever by half. Results in a redundant call tofseek
if file size <= 4096. Preferfsize = GetFileSize(fp); if (fsize > (long) sizeof buff) { if (fseek(fp, fsize - sizeof buff, SEEK_SET) < 0) { perror("cannot seek"); exit(EXIT_FAILURE); } }
Use
EXIT_FAILURE
instead of0
for an exit code.- Only one of
size
,fsize
andi
is needed. sizeof(char)
is 1 by definitionif (buff[i])=='\n')
accesses outside the buffer if file size is zero
main()
- Types are normally defined at file scope.
- string type is pointless
- creation of filename allocates a buffer and then assumes its size is 4096
- creation of filename should be a function
- snprintf return value needs checking to make sure the string was not truncated
use
PATH_MAX
:static char * make_filename(const char *name) { char *path = malloc(PATH_MAX+1); if (snprintf(path, PATH_MAX, "/home/...", name) >= PATH_MAX) { ... /* error handling */ } return path; }
No idea whether the
scanf
calls are ok. Prefer to see them hidden away in a helper function.p = strchr(field_cause, '_');
should be later, where p is used.while (p != NULL)
should beif (...)
. Bizarre.- cause_string leaks
- Why not just print what you want instead of
malloc
ing a buffer and then printing it? Really strange. - ... baling out there. Too many problems.
An even quicker way to find buffer overflow or memory issues would be to use something like valgrind (Linux, free, open source) or Purify (multiplatform, not free)
Im not suggesting that this is meant to replace a good code-review, but it should serve as an additional helper. After all, we all are only human, and even the best of us miss things in the code review. With a good set of requirements and good understanding of the problem, it should be possible to create a robust unit test-suite (I prefer test driven design) that should catch many/most issues.
A static code analyzer, like coverity or cccc (more oriented towards sw metrics) is often helpful as well. At work, we have both coverity and valgrind integrated into our nightly builds and unit testing.
-
\$\begingroup\$ That only finds actual overflows not potential overflows and thus you need comprehensive test data which is imposable to get before the fact. Thus valgrind is great when you have input that is causing a problem but of only limited use otherwise. \$\endgroup\$Loki Astari– Loki Astari2012年05月20日 16:55:28 +00:00Commented May 20, 2012 at 16:55
-
\$\begingroup\$ @LokiAstari, that's the first time Ive ever heard someone call Valgrind "limited" (and I dont think suggesting its use is worth a down vote). I updated my answer with more details as to why I think its helpful. With a good understanding of the problem, it shouldnt be so difficult to get a decent data-set. Thanks at least for helping me to improve my answer. \$\endgroup\$Brady– Brady2012年05月20日 18:39:20 +00:00Commented May 20, 2012 at 18:39
-
\$\begingroup\$ Valgrind's memcheck is limited in a way because it lulls you into a false sense of security. Memcheck allows you to get extra output from unit tests. Whatever you don't unit test, memcheck won't catch. No magic there. A static tester uses formal methods to ensure that the leak/invalid access won't happen no matter what the input was. \$\endgroup\$Kuba hasn't forgotten Monica– Kuba hasn't forgotten Monica2012年06月14日 04:07:46 +00:00Commented Jun 14, 2012 at 4:07