For fun, I've made a practical file IO class in C++ with the purpose of being light and quick, and extendable for custom parsers. Here is the code:
parser.h
//
// parser.h
//
// Created by Taylor Shuler on 4/8/15.
//
//
#ifndef _parser
#define _parser
#include <fstream>
#include <iostream>
#include <list>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
class Parser {
public:
// constructors
/// default
Parser();
/// copy
Parser(const Parser &p);
/// custom
Parser(string filename);
// destructor
~Parser();
// public members
string getFilename();
list<string> getContent();
// operators
Parser& operator=(const Parser &p);
Parser& operator-=(const Parser &p);
Parser& operator+=(const Parser &p);
bool operator==(const Parser &p) const;
bool operator!=(const Parser &p) const;
bool operator<(const Parser &p) const;
bool operator>(const Parser &p) const;
bool operator<=(const Parser &p) const;
bool operator>=(const Parser &p) const;
friend Parser operator+(const Parser &p1, const Parser &p2);
friend Parser operator-(const Parser &p1, const Parser &p2);
friend ostream& operator<<(ostream &outstream, const Parser &p);
//friend istream& operator>>(istream &instream, const Parser &p);
protected:
// shared members
void allocFilename(string filename, bool freeMemory = false);
private:
// attributes
char* filename;
list<string> content;
};
#endif /* defined(_parser) */
parser.cpp
//
// parser.cpp
//
// Created by Taylor Shuler on 4/8/15.
//
//
#include "parser.h"
void panic(string msg) {
cout << "ERROR: " << msg << endl;
exit(EXIT_FAILURE);
}
void Parser::allocFilename(string Filename, bool freeMemory) {
if (freeMemory) {
delete[] filename;
}
filename = new char[Filename.size()];
strcpy(filename, Filename.c_str());
}
Parser::Parser() {
allocFilename("");
}
Parser::Parser(string Filename) {
allocFilename(Filename);
ifstream file(Filename);
if (file) {
// we have one
string line;
while (getline(file, line)) {
content.push_back(line);
}
} else {
// we don't have one
panic("Could not find the file " + Filename + "!");
}
}
Parser::Parser(const Parser &p) {
// set the filename
allocFilename(p.filename, true);
// clear the destination for the text
content.clear();
// copy p's content over
for (auto it = p.content.begin(); it != p.content.end(); it++) {
content.push_back(*it);
}
}
Parser::~Parser() {
delete[] filename;
content.clear();
}
string Parser::getFilename() {
return filename;
}
list<string> Parser::getContent() {
return content;
}
Parser& Parser::operator=(const Parser &p) {
if (this == &p) {
return *this;
}
// set the new filename
allocFilename(p.filename);
// make room for the new content
content.clear();
// add in p's content
for (auto it = p.content.begin(); it != p.content.end(); it++) {
content.push_back(*it);
}
return *this;
}
Parser& Parser::operator-=(const Parser &p) {
for (auto it = p.content.begin(); it != p.content.end(); it++) {
content.remove(*it);
}
return *this;
}
Parser& Parser::operator+=(const Parser &p) {
for (auto it = p.content.begin(); it != p.content.end(); it++) {
content.push_back(*it);
}
return *this;
}
bool Parser::operator==(const Parser &p) const {
return content == p.content;
}
bool Parser::operator!=(const Parser &p) const {
return content != p.content;
}
bool Parser::operator<(const Parser &p) const {
return content < p.content;
}
bool Parser::operator>(const Parser &p) const {
return content > p.content;
}
bool Parser::operator<=(const Parser &p) const {
return content <= p.content;
}
bool Parser::operator>=(const Parser &p) const {
return content >= p.content;
}
Parser operator+(const Parser &p1, const Parser &p2) {
Parser sum(p1);
for (auto it = p2.content.begin(); it != p2.content.end(); it++) {
sum.content.push_back(*it);
}
return sum;
}
Parser operator-(const Parser &p1, const Parser &p2) {
Parser diff(p1);
for (auto it = p2.content.begin(); it != p2.content.end(); it++) {
diff.content.remove(*it);
}
return diff;
}
ostream& operator<<(ostream &outstream, const Parser &p) {
for (auto it = p.content.begin(); it != p.content.end(); it++) {
outstream << *it << endl;
}
return outstream;
}
main.cpp
//
// main.cpp
// parser
//
// Created by Taylor Shuler on 4/8/15.
//
//
#include "parser.h"
int main() {
Parser p1("test.txt"), p2("test2.txt"), p3, hr("hr.txt");
p3 = p1 + p2;
p3 -= hr;
cout << p3;
}
Are there any ways that it could be improved? I am using C++11, so I've leveraged the usage of the auto
keyword to remove excessive list<string>::iterator
expressions.
-
\$\begingroup\$ What is the language that you are parsing? \$\endgroup\$200_success– 200_success2015年04月09日 23:11:11 +00:00Commented Apr 9, 2015 at 23:11
-
\$\begingroup\$ It's not a specific language source file I'm parsing, if that's what you're asking. It just takes in any file and reads in its text. You can check out the full project and built executable on the GitHub project: github.com/T145/parser/tree/master/parser \$\endgroup\$T145– T1452015年04月10日 00:09:53 +00:00Commented Apr 10, 2015 at 0:09
-
2\$\begingroup\$ So it's really a file reader, not a parser? \$\endgroup\$200_success– 200_success2015年04月10日 00:12:23 +00:00Commented Apr 10, 2015 at 0:12
-
2\$\begingroup\$ A "parser" usually involves a "lexer", which defines "tokens" ...the act of "parsing" implies somewhat making programmatic sense out of some input. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2015年04月10日 00:57:35 +00:00Commented Apr 10, 2015 at 0:57
-
\$\begingroup\$ It basically is useful for text management. This is the core, and from it other custom parsers can be created to actually do things with the content that the class stores. This isn't literally a parser, but easily can be. I can implement a literal parsing functionality just fine on my own; I just wanted to be sure I was on the right track. \$\endgroup\$T145– T1452015年04月10日 01:13:32 +00:00Commented Apr 10, 2015 at 1:13
2 Answers 2
Let's start with the obvious:
using namespace std
in the header file, very bad. If you've been looking around our site for a few months you should see this suggestion come up quite often, if not, then read up on the discussion and learn about it.Again something that comes up a lot, a global identifier starting with underscore. In this case it is aggravated for being a macro and with a common name used by libraries:
#ifndef _parser #define _parser
Read up about the reserved C++ identifiers. As a side recommendation, use
ALL_UPPERCASE
for#defines
to differentiate them from actual C++ entities (the preprocessor is a mini-language inside C++).Lots of unnecessary includes here:
#include <stdio.h> #include <stdlib.h> #include <string.h>
This is not a C program, but when you do need a C header, you should reference it using its C++ alias, that is, starting with a
c
and with no.h
, e.g.:<cstdlib>
. But you don't need any of those anyway if you are writing clean C++. You do seem to need<string>
(the C++std::string
header, not the same as<string.h>
), which is missing.This was already mentioned in a OP comment:
Parser
is a bad name for a class that simply splits a source file into lines. Parsing is the process of "analysing a string of symbols, either in natural language or in computer languages, conforming to the rules of a formal grammar". In simpler terms, parsing means making sense of a set of symbols and/or text. Your class is pretty far from that. Perhaps something likeTextFileReader
would be a more honest name.
The less obvious things and some nitpicking:
From a first glance, it looks strange that your class has so many operators for arithmetic and comparison. Since each "Parser" is just a container to the lines of a file, does it make sense to be able to add/subtract instances? Doesn't make much to me. Same for the comparisons. I can relate to comparing files for equality, but for
<= | >=
ordering has never occurred to me. Not by contents, at least.Why do you use a
char*
for thefilename
is beyond my understanding, specially when the method that clones it already takes its input as astd::string
. Storing it as a C++ string would be just so much simpler.exit
ing a C++ program is about as crude as it gets when it comes to error handling. Please just throw an exception:void panic(const std::string & msg) { throw std::runtime_error("Error: " + msg + "\n"); }
The ideal would also be defining a custom exception type derived from
std::runtime_error
, if you want to refine it further.You can use range-based for with C++11 to further simplify code like this:
for (auto it = p.content.begin(); it != p.content.end(); it++)
Range-based:
for (const auto & line : p.content) { /* `line` is a string */ }
Why do you have a
protected
section in your class? No signs of inheritance around, so all the internal details should beprivate
.getFilename()
andgetContent()
should also beconst
since they don't (and shouldn't) mutate member data.A
std::list
is probably going to have inferior performance if compared to astd::vector
in this case due to poor data locality. With a list, each line is going to be allocated as a separate memory block (think about list nodes). Of course that without measuring it is impossible to say if this is a performance problem to your application, but at any rate, I would always start with avector
and only upgrade to other structures if there is a compelling reason.
-
\$\begingroup\$ The only issue I have with using the
vector
over alist
is because thevector
doesn't offer the same utility functions to manage the objects. My biggest complaint is that forvector
, you have to utilize an erase-remove idiom, while with a list you can just callremove
. Is there something that's not as "clunky" as a list but just as good as a vector in your opinion? \$\endgroup\$T145– T1452015年04月11日 06:04:33 +00:00Commented Apr 11, 2015 at 6:04 -
\$\begingroup\$ @T145, in that case, I don't think there is anything else. You might take a look at
std::deque
, which is something in between a list and a vector, though the interface is closer to a vector. \$\endgroup\$glampert– glampert2015年04月11日 06:38:04 +00:00Commented Apr 11, 2015 at 6:38 -
\$\begingroup\$ Check out the edit and let me know what you think! \$\endgroup\$T145– T1452015年04月12日 01:08:49 +00:00Commented Apr 12, 2015 at 1:08
#ifndef _parser #define _parser
Identifiers that begin with a single underscore are semi-reserved. It would be better to pick something different. How about INCLUDE_PARSER_H
?
#include <stdio.h> #include <stdlib.h> #include <string.h>
You should be including the C++ versions of these headers, namely<cstdio>
, <cstdlib>
, and <cstring>
.
using namespace std;
Please don't do this in a header file, as it pollutes the global namespace. See https://stackoverflow.com/questions/1452721/why-is-using-namespace-std-considered-bad-practice There's not much impact to qualifying each std::string
and the sole std::list
.
friend Parser operator+(const Parser &p1, const Parser &p2); friend Parser operator-(const Parser &p1, const Parser &p2);
Why are these declared as friend
? The LHS is a Parser
, they can be just regular operator methods:
Parser operator+(const Parser &rhs);
Parser operator-(const Parser &rhs);
void panic(string msg) { cout << "ERROR: " << msg << endl; exit(EXIT_FAILURE); }
the class Parser
is not the application itself; it's included in another project. It's a violation of object-oriented design for a class to be able to kill the whole process. Parser
doesn't own the process; Parser
is used by the process and shouldn't be able to kill the process. How about throw
ing an exception instead?
filename = new char[Filename.size()]; strcpy(filename, Filename.c_str());
This is a buffer overrun. Filename.size()
does not include space for a terminating '0円'
. strcpy()
expects there to be space to put in a terminating '0円'
.
Also, why are you using this hybrid string
/char *
approach? What's wrong with a string
member for the filename?
Parser::Parser(const Parser &p) { // set the filename allocFilename(p.filename, true); // clear the destination for the text content.clear();
- passing
true
to allocFilename causes it to callfree(filename)
which is uninitialized at this point. content.clear()
is pointless. You're constructing a new object instance. Thestd::list
starts out empty.
Parser::~Parser() { delete[] filename; content.clear(); }
What do you hope to accomplish by calling content.clear()
? The automatic destructor will do that anyway.
I get the feeling you're doing a lot of things because you've seen other code using it, without understanding why. This is a dangerous way to learn how to program, as you don't really have ownership of your code.
-
\$\begingroup\$ So do I even need to
delete
anything? Would any leaks occur if I just removed theallocFilename
function in favor of simplefilename = std::string
? \$\endgroup\$T145– T1452015年04月10日 16:31:28 +00:00Commented Apr 10, 2015 at 16:31 -
\$\begingroup\$ You need to balance every
new
with a call todelete
(and everynew[..]
with a call todelete[]
. Anything else, as long as the object goes out of scope the destructor will take care of it. That's the contract of using objects that C++ gives you. \$\endgroup\$Snowbody– Snowbody2015年04月11日 02:27:02 +00:00Commented Apr 11, 2015 at 2:27 -
\$\begingroup\$ @T145 I have no idea what you mean by
filename = std::string
. That doesn't make sense. The LHS is a variable and the RHS is a type (a class). You just need to have thestd::string
as a member variable, declared similarly tostd::string filename;
. \$\endgroup\$Snowbody– Snowbody2015年04月11日 02:28:04 +00:00Commented Apr 11, 2015 at 2:28 -
\$\begingroup\$ Ik; I just had that representing an object type, such as
filename = Filename
from my source, whereFilename
is of thestd::string
type. Thanks for the help btw! \$\endgroup\$T145– T1452015年04月11日 06:00:33 +00:00Commented Apr 11, 2015 at 6:00 -
\$\begingroup\$ Check out the edit and let me know what you think! \$\endgroup\$T145– T1452015年04月12日 01:08:56 +00:00Commented Apr 12, 2015 at 1:08