This is some code I've cobbled together that reads a delimited file. If the user is able to tell us some stuff about the file, it uses the supplied information. Otherwise, it tries to work stuff out itself. Then it pushes what it thinks are the first two rows of data back to the user to see if it's read the file correctly.
The problem I've faced is that efficiency is important, so I've tried to only read the file once. That creates a chicken and egg problem if the user hasn't given us a delimiter - we need to read the first row to determine the delimiter, but we can't get the fields in the first row without knowing what that delimiter is.
What I've written works but it's an awful mess in terms of readability. I've added comments for this SE question to help explain some decisions. Can you help me make the code clearer and cleaner while still keeping it as quick as possible?
[RequireHttps]
public class HomeController : BaseController
{
private IFileParser _fileParser;
public HomeController(IFileParser fileParser)
{
_fileParsers = fileParser; // this is handled via Ninject
}
// other ActionResults
public ActionResult NewJob(Job job)
{
if (ModelState.IsValid && Request.Files.Count > 0)
{
HttpPostedFileBase file = Request.Files[0];
if (file != null && file.ContentLength > 0)
{
// FileQueue is a DTO - you can ignore this constructor
FileQueue fQ = new FileQueue(file);
string[] firstline = null, secondline = null;
if (Request["AutoDetect"] == null)
{
// these will either all be blank or all populated
fQ.FieldDelimiter = Request["FieldDelimiter"];
fQ.TextDelimiter = Request["TextDelimiter"];
}
_fileParser.ParseFirstTwoRows(fQ, ref firstline, ref secondline);
if (fQ.TextDelimiter == "")
{
if (_fileParser.IsFileDoubleQuoteDelimited(firstline) || _fileParser.IsFileDoubleQuoteDelimited(secondline))
{
fQ.TextDelimiter = "\"";
}
}
fQ.NumberOfFields = firstline.Count();
fQ.FirstLine = firstline;
fQ.SecondLine = secondline;
return RedirectToAction("FieldMapping", fQ);
}
}
return View("Error");
}
}
public class FileParser : IFileParser
{
// other file parsing functions
public void ParseFirstTwoRows(FileQueue file, ref string[] firstRow, ref string[] secondRow)
{
// StopWatch tests suggest TextFieldParser appears to be marginally faster than a StreamReader
using (TextFieldParser parser = new TextFieldParser(file.Path))
{
parser.HasFieldsEnclosedInQuotes = false;
if (file.TextDelimiter == "\"")
{
parser.HasFieldsEnclosedInQuotes = true;
}
if (file.FieldDelimiter == null)
{
file.FieldDelimiter = ParseFirstRow(parser.ReadLine(), ref firstRow);
parser.Delimiters = new string[] { file.FieldDelimiter };
}
else
{
parser.Delimiters = new string[] { file.FieldDelimiter };
firstRow = parser.ReadFields();
}
secondRow = parser.ReadFields();
}
}
public string ParseFirstRow(string firstLine, ref string[] firstRow)
{
firstLine = firstLine.Trim('\"', '\'');
if (firstLine.Contains('|'))
{
firstRow = Regex.Split(firstLine, "[\"']*\\|[\"']*");
return "|";
}
else if (firstLine.Contains(','))
{
firstRow = Regex.Split(firstLine, "[\"']*,[\"']*");
return ",";
}
else if (firstLine.Contains('\t'))
{
firstRow = Regex.Split(firstLine, "[\"']*\t[\"']*");
return "\t";
}
firstRow = Regex.Split(firstLine, "[\"']*[:;@#~\\.&_\t,-]*[\"']*");
return "";
}
public bool IsFileDoubleQuoteDelimited(string[] lineText)
{
string line = string.Join(" ", lineText);
int countOfChar = line.Length - line.Replace("\"", "").Length
if (countOfChar == (lineText.Count() * 2))
{
return true;
}
return false;
}
}
-
1\$\begingroup\$ Is this how your code is organised in the real thing or have you moved it all to the controller to make it easier to post? \$\endgroup\$RobH– RobH2016年04月27日 11:12:03 +00:00Commented Apr 27, 2016 at 11:12
-
\$\begingroup\$ @RobH I've moved it to make it easier to post. In the real code the Controller has the NewJob function and all the other functions are in a separate FileParser class which is injected into the controller. \$\endgroup\$Bob Tway– Bob Tway2016年04月27日 11:14:14 +00:00Commented Apr 27, 2016 at 11:14
-
\$\begingroup\$ @MattThrower You shouldn't do that, because "you should split those methods into separate classes" is a valid review, but it's not applicable to your real code. \$\endgroup\$svick– svick2016年04月30日 13:51:57 +00:00Commented Apr 30, 2016 at 13:51
-
\$\begingroup\$ @svick Fair enough. I have classed the code as requested. I'd dearly love to put a bounty on this, as it seems a great learning opportunity, but I can't afford it :( \$\endgroup\$Bob Tway– Bob Tway2016年05月03日 13:36:21 +00:00Commented May 3, 2016 at 13:36
1 Answer 1
Its a little bit hard to follow the ActionResult NewJob(Job job)
method but this can be fixed by using a guard condition like so
if (!ModelState.IsValid || Request.Files.Count == 0)
{
return View("Error");
}
HttpPostedFileBase file = Request.Files[0];
if (file == null || file.ContentLength == 0)
{
return View("Error");
}
// the remaining code
but this will still have some small code duplication. So why don't we add a method using the TryGet
pattern like so
private bool TryGetFile(out HttpPostedFileBase file)
{
file = null;
if (!ModelState.IsValid || Request.Files.Count == 0)
{
return false;
}
file = Request.Files[0];
return file != null && file.ContentLength > 0;
}
and use it like so
HttpPostedFileBase file;
if (!TryGetFile(out file))
{
return View("Error");
}
You really have to work on naming things. Let us stay at the NewJob()
method.
- methods should be named using a verb or verb phrase.
- don't use abbreviations for naming things. Abbreviations will lead to code which is hard to read and to maintain.
- if you are dealing with a collection you should use the plural form of the name. See:
string[] firstline = null;
. - declaring multiple variables on the same line reduces the readability