I've implemented a config file parser in C++, which I may expand in future for personal projects. As of now the config file only supports floats, strings and booleans.
The parser is based on various if
/else
statements and string stream for data extraction.
The actual parser (conf_parser.h)
#pragma once
#include <string.h>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <iostream>
#include <unordered_map>
enum class ConfigDataType{
TYPE_FLOAT,
TYPE_BOOL,
TYPE_STRING,
TYPE_UNDEFINED,
};
//Why a union? Well if i opted for using a struct, let's say the config file had thousand's of elements, wouldn't that be wasting of free data?
union __int_m_data{
__int_m_data(){}
~__int_m_data(){} //All deallocations are handled later
__int_m_data(const __int_m_data&){}
char* m_s_data;
float m_f_data;
bool m_b_data;
};
struct ConfigData{
public:
ConfigData(const std::string& label,const char* data){
m_data.m_s_data=(char*)malloc(strlen(data)+1);
strcpy(m_data.m_s_data,data);
m_type=ConfigDataType::TYPE_STRING;
m_Fatal=false;
m_label=label;
}
ConfigData(const std::string& label,bool data){
m_data.m_b_data=data;
m_type=ConfigDataType::TYPE_BOOL;
m_Fatal=false;
m_label=label;
}
ConfigData(const std::string& label,float data){
m_data.m_f_data=data;
m_type=ConfigDataType::TYPE_FLOAT;
m_Fatal=false;
m_label=label;
}
//This is used to catch faulty issues
ConfigData(bool Fatality){
m_type=ConfigDataType::TYPE_UNDEFINED;
m_Fatal=Fatality;
}
ConfigData(){
m_type=ConfigDataType::TYPE_UNDEFINED;
m_Fatal=false;
}
ConfigDataType getType(){ return m_type; }
bool getFatality(){ return m_Fatal; }
std::string getLabel(){ return m_label; }
__int_m_data getActualData(){ return m_data; } //This is for users to actually figure out what the data was
~ConfigData(){
if(m_type==ConfigDataType::TYPE_STRING){
if(!m_data.m_s_data) //if not null
free((void*)m_data.m_s_data);
}
}
friend std::ostream& operator<<(std::ostream& os,const ConfigData& data);
private:
ConfigDataType m_type;
__int_m_data m_data;
std::string m_label;
bool m_Fatal;
};
ConfigData parseConfigLine(const std::string& Line,int line_of_file){
std::stringstream iss(Line);
std::string VariableName,Equals,Data;
iss>>VariableName>>Equals>>Data;
__int_m_data Outdata;
if(VariableName=="") //Line is empty
return ConfigData(false);
//Checking if a line has an header
if(VariableName[0]=='[' && VariableName[VariableName.length()-1]==']'){
return ConfigData(false);
}
if(VariableName[0]=='#' || Equals[0]=='#' || Data[0]=='#'){ //is an comment
return ConfigData(false);
}
if(Equals!="="){ //Some formatting issue
return ConfigData(true);
}
if(Data=="TRUE" || Data=="FALSE" || Data=="true" || Data=="false"){
return ConfigData(VariableName,(Data=="TRUE"||Data=="true")?true:false);
}
float f_data;
bool not_Float=false;
try{
f_data=std::stof(Data);
}
catch(const std::exception& e){
not_Float=true;
}
if(!not_Float){
return ConfigData(VariableName,f_data);
}
return ConfigData(VariableName,Data.c_str());
}
std::unordered_map<std::string,ConfigData> LoadConfigFile(const std::string& FileName){
unsigned int line_of_file=1;
std::unordered_map<std::string,ConfigData> configBuffer;
std::fstream fileBuff(FileName);
if(!fileBuff){
std::cout<<"Failed loading config file\n";
exit(-1);
}
std::string Line;
while(std::getline(fileBuff,Line)){
ConfigData buffData(parseConfigLine(Line,line_of_file));
if(!buffData.getFatality()){
line_of_file++;
configBuffer[buffData.getLabel()]=buffData;
continue;
}
else{
std::cout<<"ERROR: Formatting error at line"<<line_of_file<<"\n";
exit(-1);
}
}
fileBuff.close();
return configBuffer;
}
std::ostream& operator<<(std::ostream& os,const ConfigData& data){
if(data.m_type==ConfigDataType::TYPE_BOOL){
os<<data.m_data.m_b_data;
}
else if(data.m_type==ConfigDataType::TYPE_FLOAT){
os<<std::boolalpha<<data.m_data.m_f_data;
}
else if(data.m_type==ConfigDataType::TYPE_STRING){
os<<data.m_data.m_s_data;
}
else{
os<<"UNDEFINED DATA TYPE"; //Not good
}
return os;
}
Sample config file
#==================================================
# "#" is a comment
# Currently only booleans, floats, and strings are allowed
# sections as of now are only used for formatting or organizing purposes (Header must be one worded),
# eg: BOOTUP BOOT_UP etc are valid, but not "BOOT UP"
# Format: Key = Value, (Key=Value) (Key= Value) are all error prone.
# NOTE: As of now no exponential/scientific notations are allowed.
#==================================================
[BOOTUP]
RDP_SESSION = FALSE #FALSE(TRUE)/false(true) all valid
IS_HOST = true
BOOTUP_TIME = 121.99 #ms
[PHYSICS]
g = -9.81
COEFFICIENT_OF_FRICTION = 0.21 #(STATIC)=(KINETIC), ASSUME
COEFFICIENT_OF_VISCOSITY = 10 #Poise
COEFF_OF_THERM_EXP_STEEL = 0.000000000005 # 5e-12 not valid
[EFFECTS]
POST_PROCESSING = FALSE
RASTERIZATION = TRUE
M_EDIT_ID = ALPHA_001
main.cpp
#include "conf_parser.h"
#include <iostream>
int main(int argc,char** argv){
std::unordered_map<std::string,ConfigData> buff=LoadConfigFile("file.conf");
for(auto& data:buff){
if(data.second.getType()==ConfigDataType::TYPE_FLOAT){
std::cout<<data.first<<" [FLOAT]"<<": "<<data.second<<"\n";
}
else if(data.second.getType()==ConfigDataType::TYPE_BOOL){
std::cout<<data.first<<" [BOOL]"<<": "<<data.second<<"\n";
}
else if(data.second.getType()==ConfigDataType::TYPE_STRING){
std::cout<<data.first<<" [STRING]"<<": "<<data.second<<"\n";
}
}
}
-
\$\begingroup\$ Not really worth an answer, but m_Fatal but m_data is inconsistent, either capitalise the word after the underscore or not and stay consistent. \$\endgroup\$anon– anon2023年12月25日 16:22:10 +00:00Commented Dec 25, 2023 at 16:22
3 Answers 3
Here are some things that may help you improve your code.
Use include guards
There should be an include guard in each .h
file. That is, start the file with:
#ifndef CONF_PARSER_H
#define CONF_PARSER_H
// file contents go here
#endif // CONF_PARSER_H
The use of #pragma once
is a common extension, but it's not in the standard and thus represents at least a potential portability problem. See SF.8
Separate interface from implementation
It makes the code somewhat longer for a code review, but it's often very useful to separate the interface from the implementation. In C++, this is usually done by putting the interface into separate .h
files and the corresponding implementation into .cpp
files. It helps users (or reviewers) of the code see and understand the interface and hides implementation details. The other important reason is that you might have multiple source files including the .h
file but only one instance of the corresponding .cpp
file. In other words, split your existing .h
file into a .h
file and a .cpp
file.
Think carefully about the interface
The use of this interface seems a little difficult. In a real program I can imagine that I might want to retrieve the value for the coefficient of friction by doing something like cf = Config["COEFFICIENT_OF_FRICTION"];
but that is not possible withe the current implementation. If the configuration file specifies a value that the program is expected to use, it seems likely that there will be a defined list of such settings within the program somewhere. So in the case of, say, a COEFFICIENT_OF_FRICTION
, it seems likely that we would know already that we were planning to use, say, a float
for this. Also, does it make sense to simply throw away section information from the config file?
Consider the use of standards
Instead of creating your own, I'd suggest using std::variant
to contain the values for this.
-
1\$\begingroup\$ "Separate interface from implementation" is simply required. Header guards only prevent the header being included multiple times from one CPP file. The symbols will still be generated for each CPP file including this header, causing the linker to get confused about duplicate symbols. This won't be a problem at first, as the compiler will attempt to inline everything, but that in itself may have undesirable effects like bigger compiled output size. stackoverflow.com/questions/33487729/… \$\endgroup\$Daniël van den Berg– Daniël van den Berg2023年12月25日 21:41:04 +00:00Commented Dec 25, 2023 at 21:41
Everything Edward said; plus some code-level review:
union __int_m_data{
Use of double underscore within an identifier is dangerous. Such names are reserved to the implementation for any purpose (including macros, which can make for hard-to-diagnose errors).
Also, it's not normal (or helpful) to provide constructors or destructors for a union. In particular, the do-nothing copy constructor makes the getActualData()
function useless (actually dangerous, since the copy is uninitialised - no surprise it's not exercised in the example).
m_data.m_s_data=(char*)malloc(strlen(data)+1); strcpy(m_data.m_s_data,data);
It looks here that you're assuming the implementation exercises its right to create a global-namespace versions of std::malloc
and std::free
. That's not a portable assumption - use the full names. And prefer including <cstring>
over deprecated <string.h>
(and use the full names of std::strlen
and std::strcpy
).
Why are we using malloc()
rather than new
? When malloc()
fails, it returns a null pointer instead of throwing std::bad_alloc
, so we need to check that before using the result. I'd avoid the whole allocation thing (and the need to manually ensure std::free()
or delete[]
is used) by storing a std::string
instead (in a std::variant
, of course, since unions should contain only POD types).
Write some unit tests
This kind of code is well-suited to unit testing (once you remove the dependence on file names, and allow passing an open std::istream&
instead). Everything bar file opening can, and should, be tested independently in a unit-test suite.
-
\$\begingroup\$ A variable name with a double underscore is only reserved if the underscores are at the beginning of the name, not just anywhere within the name. \$\endgroup\$Mark H– Mark H2023年12月29日 00:37:34 +00:00Commented Dec 29, 2023 at 0:37
-
\$\begingroup\$ @MarkH, you're thinking of C. In C++, [lex.name] says (my emphasis): Each identifier that contains a double underscore
__
or begins with an underscore followed by an uppercase letter is reserved to the implementation for any use. \$\endgroup\$Toby Speight– Toby Speight2024年01月03日 08:06:07 +00:00Commented Jan 3, 2024 at 8:06
Don't open file as writable if you don't intend to modify it
Currently the code is not able to read a file which it does not have permission to write - quite common for global configuration files, like those under /etc on Linux. One way to fix this is by replacing fstream
with ifstream
.
Enable more warnings / use static analyzer
If compiling with -Wextra
or similar options, you would notice that the parameter line_of_file
inside parseConfigLine
is never used. This is not a real problem, but in many cases unused parameters to functions can reveal logic errors, so it's a good idea to enable as many warnings as possible (at least sometimes) or use a static analyzer like cppcheck.
-
\$\begingroup\$ Not only use
-Wextra
but also-Werror
so every warning message gets elevated to an error message and you can not compile the program without fixing the warning. Yes, it's annoying, but it forces you to not ignore warning messages and deal with them one way or the other. \$\endgroup\$infinitezero– infinitezero2023年12月26日 08:47:46 +00:00Commented Dec 26, 2023 at 8:47