Based on my initial implementation found here and the suggested improvements in answers and comments, I've refactored the code.
The main changes are:
- Removed redundant buffering.
- Simplified constructors by implementing a
CsvWriterSettings
class. - Refactored
CsvWriter<T>.WriteLine(T value)
to make it more readable with the use of a private nested classCsvFieldWriter
.
The code as it stands now is the following:
public sealed class CsvWriter<T> : MarshalByRefObject, IDisposable
{
private const char EscapeCharacter = '\\';
private const char EscapedSeparatorCharacter = 's';
private bool isDisposed;
private Stream output;
private readonly Encoding encoding;
private readonly bool escapeListSeparatorInsideField;
private readonly bool escapeNewLineCharactersInsideField;
private readonly List<char> ignoredCharacters;
private readonly char listSeparatorCharacter;
private readonly IEnumerable<Func<T, string>> projections;
private readonly CsvFieldWriter fieldWriter;
private readonly StringBuilder buffer;
private object syncObject;
public CsvWriter(Stream output, IEnumerable<Func<T, string>> projections, CsvWriterSettings settings = null)
{
if (output == null)
throw new ArgumentNullException("output");
if (projections == null)
throw new ArgumentNullException("projections");
if (!output.CanWrite)
throw new ArgumentException("Can not write to specified stream.", "output");
var csvSettings = settings ?? new CsvWriterSettings(Encoding.Default);
this.output = output;
this.projections = projections;
this.listSeparatorCharacter = csvSettings.ListSeparatorCharacter;
this.encoding = csvSettings.OutputEncoding;
this.ignoredCharacters = (List<char>)csvSettings.IgnoredCharacters;
this.escapeListSeparatorInsideField = csvSettings.EscapeListSeparatorInsideField;
this.escapeNewLineCharactersInsideField = csvSettings.EscapeNewLineCharactersInsideField;
this.fieldWriter = new CsvFieldWriter(this);
this.buffer = new StringBuilder();
this.syncObject = new object();
}
public bool EscapeListSeparatorInsideField { get { return this.escapeListSeparatorInsideField; } }
public bool EscapeNewLineCharactersInsideField { get { return this.escapeNewLineCharactersInsideField; } }
public IEnumerable<char> IgnoredCharacters { get { return this.ignoredCharacters; } }
public char ListSeparatorCharacter { get { return this.listSeparatorCharacter; } }
public void WriteLine(T value)
{
if (value == null)
throw new ArgumentNullException("value");
Debug.Assert(this.projections != null);
Debug.Assert(this.fieldWriter != null);
Debug.Assert(this.encoding != null);
Debug.Assert(this.buffer != null);
Debug.Assert(this.output != null);
lock (syncObject)
{
if (this.isDisposed)
throw new ObjectDisposedException(this.GetType().Name);
foreach (var projection in this.projections)
{
if (projection != null)
{
buffer.Append(this.fieldWriter.WriteField(projection(value) ?? string.Empty));
buffer.Append(this.ListSeparatorCharacter);
}
}
this.buffer.Length--;
this.buffer.AppendLine();
this.output.Write(this.encoding.GetBytes(this.buffer.ToString()), 0, this.buffer.Length);
this.buffer.Clear();
}
}
public void WriteLines(IEnumerable<T> values)
{
if (values == null)
throw new ArgumentNullException("values");
foreach (var value in values)
{
WriteLine(value);
}
}
public void Dispose()
{
if (this.isDisposed)
return;
lock (syncObject)
{
if (this.isDisposed)
return;
Debug.Assert(this.buffer.Length == 0);
this.output = null;
this.isDisposed = true;
GC.SuppressFinalize(this);
}
}
private class CsvFieldWriter
{
private readonly CsvWriter<T> csvWriter;
private readonly StringBuilder fieldBuffer;
private readonly bool escapeCharacters;
public CsvFieldWriter(CsvWriter<T> csvWriter)
{
this.csvWriter = csvWriter;
this.fieldBuffer = new StringBuilder();
this.escapeCharacters = csvWriter.escapeNewLineCharactersInsideField || csvWriter.escapeListSeparatorInsideField;
}
public string WriteField(string field)
{
Debug.Assert(csvWriter != null);
Debug.Assert(field != null);
Debug.Assert(fieldBuffer != null);
fieldBuffer.Clear();
if (this.escapeCharacters || csvWriter.ignoredCharacters.Count > 0)
{
foreach (var c in field)
{
if (c == CsvWriter<T>.EscapeCharacter && this.escapeCharacters)
{
fieldBuffer.Append(CsvWriter<T>.EscapeCharacter);
fieldBuffer.Append(c);
}
else if ( c == csvWriter.listSeparatorCharacter && csvWriter.escapeListSeparatorInsideField)
{
fieldBuffer.Append(CsvWriter<T>.EscapeCharacter);
fieldBuffer.Append(CsvWriter<T>.EscapedSeparatorCharacter);
}
else if (csvWriter.ignoredCharacters.Contains(c))
{
continue;
}
else if (c == '\n' && csvWriter.escapeNewLineCharactersInsideField)
{
fieldBuffer.Append(CsvWriter<T>.EscapeCharacter);
fieldBuffer.Append('n');
}
else if (c == '\r' && csvWriter.escapeNewLineCharactersInsideField)
{
fieldBuffer.Append(CsvWriter<T>.EscapeCharacter);
fieldBuffer.Append('r');
}
else
{
fieldBuffer.Append(c);
}
}
}
else
{
fieldBuffer.Append(field);
}
return this.fieldBuffer.ToString();
}
}
}
The CsvWriterSettings
class:
public class CsvWriterSettings
{
private readonly Encoding outputEncoding;
private readonly bool escapeListSeparatorInsideField;
private readonly bool escapeNewLineCharactersInsideField;
private readonly List<char> ignoredCharacters;
private readonly char listSeperatorCharacter;
public CsvWriterSettings(Encoding encoding = null, bool escapeNewLineCharactersInsideFields = true, bool escapeListSeparatorInsideFields = true,
IEnumerable<char> ignoredCharacters = null)
: this(CultureInfo.CurrentCulture.TextInfo.ListSeparator[0], encoding, escapeNewLineCharactersInsideFields, escapeListSeparatorInsideFields, ignoredCharacters)
{
}
public CsvWriterSettings(char listSeparator, Encoding encoding = null, bool escapeNewLineCharactersInsideFields = true, bool escapeListSeparatorInsideFields = true,
IEnumerable<char> ignoredCharacters = null)
{
this.listSeperatorCharacter = listSeparator;
this.outputEncoding = encoding ?? Encoding.Default;
this.escapeListSeparatorInsideField = escapeListSeparatorInsideFields;
this.escapeNewLineCharactersInsideField = escapeNewLineCharactersInsideFields;
this.ignoredCharacters = new List<char>();
if (ignoredCharacters != null)
{
foreach (char c in ignoredCharacters)
{
this.ignoredCharacters.Add(c);
}
}
}
public bool EscapeNewLineCharactersInsideField { get { return this.escapeNewLineCharactersInsideField; } }
public bool EscapeListSeparatorInsideField { get { return this.escapeListSeparatorInsideField; } }
public Encoding OutputEncoding { get { return this.outputEncoding; } }
public char ListSeparatorCharacter { get { return this.listSeperatorCharacter; } }
public IEnumerable<char> IgnoredCharacters { get { return this.ignoredCharacters; } }
}
In order to take advantage of the compiler's type inference I've also implemented the following static class:
public static class CsvWriter
{
public static IList<Func<T, string>> CreateEmptyProjectionList<T>(T value)
{
return new List<Func<T, string>>();
}
public static CsvWriter<T> CreateWriter<T>(T value, Stream output, IEnumerable<Func<T, string>> projections, CsvWriterSettings settings = null, int bufferLength = 512)
{
return new CsvWriter<T>(output, projections, settings);
}
}
And now, an example of how this class could be used would be:
var inDebt = from c in customers
where c.Credit < 0
select new { c.LastName, c.FirstName, c.Credit };
var projections = CsvWriter.CreateEmptyProjectionList(inDebt.FirstOrDefault());
projections.Add(c => c.LastName);
projections.Add(c => c.FirstName);
projections.Add(c => c.Credit.ToString("C"));
using (var writer = CsvWriter.CreateWriter(inDebt.FirstOrDefault(), stream, projections))
{
writer.WriteLines(inDebt);
}
Is this an acceptable approach or am I off to a bad start with this implementation?
1 Answer 1
I'd simply have a CsvWriterSettings
field in the CsvWriter
class, so I wouldn't need to do all of this:
this.listSeparatorCharacter = csvSettings.ListSeparatorCharacter;
this.encoding = csvSettings.OutputEncoding;
this.ignoredCharacters = (List<char>)csvSettings.IgnoredCharacters;
this.escapeListSeparatorInsideField = csvSettings.EscapeListSeparatorInsideField;
this.escapeNewLineCharactersInsideField = csvSettings.EscapeNewLineCharactersInsideField;
Also, the point of the CsvWriterSettings
class is to avoid a constructor with numerous parameters. Just have public properties and assign those, e.g.
var settings = new CsvWriterSettings
{
EscapeNewLineCharactersInsideField = true,
EscapeListSeparatorInsideField= false,
}
That is far clearer code that doesn't require someone to figure out which parameter of the constructor corresponds to which property.
I also wouldn't use an IEnumerable<char>
for IgnoredCharacters
, I'd go for ICollection<char>
or even IList<char>
or List<char>
.
-
\$\begingroup\$ The point of doing it this way is because
CsvWriterSettings
is immutable. I don't want anyone changing settings once the writer has been initialized; what you are proposing would allow changes to the writer's behavior between calls toWriteLine(T value)
orWriteLines(IEnumerable<T> values)
which could create inconsistant csv files. Still I agree that the code is unecesarily verbose. I should just store theCsvWriterSettings
object in a readonly field. \$\endgroup\$InBetween– InBetween2014年11月17日 09:39:07 +00:00Commented Nov 17, 2014 at 9:39 -
\$\begingroup\$ All I'm proposing is that you have a
private readonly CsvWriterSettings
which you assign via the contructor; I don't see how that one can be changed between calls. \$\endgroup\$BCdotWEB– BCdotWEB2014年11月17日 09:45:53 +00:00Commented Nov 17, 2014 at 9:45 -
\$\begingroup\$ I understand, the question is: I create a
CsvWriter
with a specificCsvWriterSettings
. Then after a while, I mutateCsvWriterSettings
. What happens to the initialized writer? Should it mutate its behavior? Should it ignore the changes? Both options are not obviously right, so I want to avoid this alltogether by makingCsvWriterSettings
immutable. You are right concernig the fields initialization, I just need oneCsvWriterSettings
field. The code stands as it does because initiallyCsvWriterSettings
was mutable and I had decided to ignore mutations once the writer was created. \$\endgroup\$InBetween– InBetween2014年11月17日 09:58:44 +00:00Commented Nov 17, 2014 at 9:58