I would appreciate any opinions and critiques anyone has on my code.
My code allow you to read a ini file. parser.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "parser.h"
// read a the whole line, stop if encouter eof
static char *readline(FILE *in, sip_error *error)
{
size_t size = DEFAULT_LINE_SIZE;
char *str;
if((str = malloc(sizeof *str * size)) == NULL) {
*error = ERROR_MALLOC;
return NULL;
}
char *pos;
char *temp_alloc;
char lread = 0;
while(fgets(str + size - DEFAULT_LINE_SIZE, DEFAULT_LINE_SIZE, in)) {
lread = 1;
if((pos = strchr(str + size - DEFAULT_LINE_SIZE, '\n')) != NULL) {
*pos = '0円';
break;
}
size = size + DEFAULT_LINE_SIZE - 1;
temp_alloc = realloc(str, sizeof *str * size);
if(temp_alloc == NULL) {
*error = ERROR_MALLOC;
free(str);
return NULL;
}
}
if(!lread) {
free(str);
return NULL;
}
return str;
}
// strip line for extra spaces and comments
static void line_strip(char *str)
{
char *iterstr = str;
char *sstr, *estr;
while(*iterstr && isspace(*iterstr))
++iterstr;
sstr = iterstr;
while(*iterstr) {
if(!isspace(*iterstr)) {
estr = iterstr;
if(*estr == '#' || *estr == ';') {
--estr;
break;
}
}
++iterstr;
}
*(estr+1) = '0円';
memmove(str, sstr, estr - sstr + 2);
estr = str + (estr - sstr);
while(estr > str && isspace(*estr))
--estr;
*(estr+1) = '0円';
}
void sip_parse_file(FILE *file, void (*handler)(enum line_type type,
char *key_section, char *value), sip_error *error)
{
char *section;
char *temp_sec;
char *line;
char *pos;
char *key, *value;
size_t ssize = DEFAULT_SECTION_SIZE;
size_t size;
if((section = malloc(sizeof *section * ssize)) == NULL) {
*error = ERROR_MALLOC;
return;
}
while((line = readline(file, error)) != NULL) {
if(*error)
goto FREE;
// the line is a new section
line_strip(line);
if(line[0] == '[') {
size = strlen(line);
if(size > ssize) {
if((temp_sec = realloc(section,
ssize += ssize - size)) == NULL)
{
*error = ERROR_MALLOC;
goto FREE;
}
section = temp_sec;
}
if(line[size - 1] == ']') {
line[size -1] = '0円';
strcpy(section, line + 1);
handler(e_section, section, NULL);
}
else *error = ERROR_PARSE;
}
// key_value pair
else if((pos = strchr(line, '=')) != NULL) {
*(pos - 1) = '0円';
key = line;
value = pos + 1;
line_strip(key);
line_strip(value);
handler(e_key_value, key,
value);
}
else if(line[0] != '0円')
*error = ERROR_PARSE;
FREE:
free(line);
if(*error)
break;
}
free(section);
}
parser.h
enum line_type {
e_section,
e_key_value,
};
typedef unsigned int sip_error;
#define ERROR_MALLOC 1
#define ERROR_PARSE 2
#define DEFAULT_SECTION_SIZE 32
#define DEFAULT_LINE_SIZE 64
void sip_parse_file(FILE *file, void (*handler)(enum line_type type,
char *key_section, char *value), sip_error *error);
Example of use:
#include <stdio.h>
#include "parser.h"
// this is our handler that intercept each section|key-value when its read from the ini file
// we just print what we read from the ini file to the standard output.
void handler(enum line_type type, char *key_section, char *value)
{
// type indicate that we have a section name
if(type == e_section)
printf("section : %s", key_section);
// type indicate that we have key-value
else
printf("key = %s\nvalue = %s\n", key_section, value);
putchar('\n');
}
int main(int argc, char **argv)
{
// we use a variable error to check wheter an error happened while reading the ini file
sid_error error = 0;
sip_parse_file(stdin, handler, &error);
if(error)
printf("error ocured while reading ini file");
return error;
}
-
8\$\begingroup\$ Thanks for sharing your code! Would you mind editing a description of what it does? It makes life easier on reviewers and you'll likely get a better answer if people understand at a high level what your code is supposed to do. \$\endgroup\$RubberDuck– RubberDuck2015年01月26日 20:27:19 +00:00Commented Jan 26, 2015 at 20:27
-
\$\begingroup\$ in most systems, there is a library function: 'readline()' that handles (almost) any length input line. I suggest using it (if available) rather than generating a function that partially replaces it. \$\endgroup\$user3629249– user36292492015年01月27日 19:50:53 +00:00Commented Jan 27, 2015 at 19:50
-
\$\begingroup\$ this line: 'temp_alloc = realloc(str, sizeof *str * ssize);' allocates room for an array of char pointers with size entries , where ssize is a multiple of 64. This is probably not what you want. There are numerous other errors, including some more of this same kind of error \$\endgroup\$user3629249– user36292492015年01月27日 21:51:16 +00:00Commented Jan 27, 2015 at 21:51
-
\$\begingroup\$ sizeof *str == sizeof(char) == 1 \$\endgroup\$KarimS– KarimS2015年01月27日 21:59:24 +00:00Commented Jan 27, 2015 at 21:59
4 Answers 4
Design suggestions:
I'll have to agree with what was mentioned elsewhere about the memory allocation strategy. I don't like the possibility of several malloc
s/realloc
s being called in a single run. The way I see it, you have two main options: either optmize for speed or for memory usage. Currently you have neither.
Frequent reallocation will be slower and will fragment the heap. In a memory constrained environment, fragmenting the little memory you have is the last thing you want, since it just creates dead unusable space.
I would suggest that you either load the whole file up-front, also simplifying logic a lot in the process by getting rid of the allocation/free logic or, use a fixed size stack allocated memory buffer, something like:
char line_buffer[512];
while (fgets(line_buffer, sizeof(line_buffer), file))
{
...
}
Then you will only need some simple logic to call fgets
again if a line can't fit in the buffer. Lines of text are usually short, so with a big enough buffer, chances are that you will not have to repeat a file IO very often for a single line.
Loading the file entirely into memory is of course even simpler and probably the fastest alternative, since you would be performing just one read operation.
Code review:
A few things you could change/improve in this code:
Right now, your functions aren't doing any input parameter validation. If the caller passes a null pointer or a closed file, your code will crash silently. Adopt an input validation strategy. You can validate the inputs and return an error or at least
assert
to help the debugging process.I find short names, such as these:
char *sstr, *estr;
a little confusing and hard to read.
start_ptr
/end_ptr
orstr_start
/str_end
would read easier. Same goes forssize
, which I think stands forsection_size
.I would recommend always adding a pair of
{ }
on flow-control statements, even for the single line ones. That simplifies maintenance and shields your code from ambiguity caused by bad indentation.The
goto
inside thewhile
loop insip_parse_file()
is hard to follow. It also looks dangerous because the jump target is inside the loop, with more code after it. I think It would be a good idea putting some effort into replacing it with something more structured. Separating some tasks into local helper functions might help.The
line_type
enum could be turned into atypedef enum
to avoid having to typeenum line_type
everywhere. E.g.:typedef enum { e_section, e_key_value, } line_type;
ERROR_MALLOC
,ERROR_PARSE
names are easy to collide with other libraries. A name prefix wouldn't be a bad idea. Same goes for the other constants.Your header file seems to be missing an Include Guard.
It has a bug.
// key_value pair else if((pos = strchr(line, '=')) != NULL) { *(pos - 1) = '0円'; key = line; value = pos + 1;
If there no space between de key and the equal sign it eats a byte from key.
readline()
will read the entire FILE
should it encounter an embedded '0円'
as if((pos = strchr(str + size - DEFAULT_LINE_SIZE, '\n'))
will not break the loop. Better to continue loop if '\n'
exist and the buffer just read is max length.
Instead of doing lots of reallocs, read the complete file into a buffer - which you allocate after getting the file size. Once you have the file in the buffer you can easily search for \n
to determine the start and of lines.
e.g.
#include <stdio.h>
#include <stdlib.h>
FILE* fp = fopen( "yourfile.ini", "rb" ); // open in binary to avoid \n translation
if ( fp != NULL )
{
fseek( fp, 0L, SEEK_END );
size_t size = ftell(fp);
fseek( fp, 0L, SEEK_SET );
char* fileContent = malloc( size );
if (fileContent != NULL )
{
fread( fileContent, 1, size, fp );
...
EDIT
to the actual code review:
writing sizeof *str * size
to calculate size is unnecessary and is hard to read, just write size. i.e. malloc(size)
initialize all variables, one day you may change the order of the statements and then you may end up with uninitialized variable during runtime
BTW I don't think your code works, when realloc
is successful you never set str to point to the new buffer i.e. temp_alloc so by next fgets on the same line (by lines larger than DEFAULT_LINE_SIZE
) you are in uncharted territory.
-
2\$\begingroup\$ you don't consider here if the memory is limited, and if the file is large. \$\endgroup\$KarimS– KarimS2015年01月27日 10:34:11 +00:00Commented Jan 27, 2015 at 10:34
-
\$\begingroup\$ true. but then OTOH u didn't mention that as requirement and besides having lots of reallocs that may fail in the middle is not the WTG when it comes to having limited memory. You can optimize memory size by using the buffer for the file and have pointers into it. \$\endgroup\$AndersK– AndersK2015年01月27日 12:55:14 +00:00Commented Jan 27, 2015 at 12:55