Friday, April 26, 2013

String Extensions: Split Qualified

Splitting on a string is easy.
Respecting qualified (quoted) strings can be hard.
Identifying escaped characters in qualified strings is very tricky.
Splitting on a qualified string that takes escape characters into account is really difficult!

Unit Tests

[Theory]
[InlineData(null, new string[0])]
[InlineData("", new string[0])]
[InlineData("hello world", new[] { "hello", "world" })]
[InlineData("hello world", new[] { "hello", "world" })]
[InlineData("\"hello world\"", new[] { "\"hello world\"" })]
[InlineData("\"hello world\"", new[] { "\"hello world\"" })]
[InlineData("hello \"goodnight moon\" world", new[]
{
 "hello", 
 "\"goodnight moon\"", 
 "world", 
})]
[InlineData("hello \"goodnight \\\" moon\" world", new[]
{
 "hello", 
 "\"goodnight \\\" moon\"", 
 "world", 
})]
[InlineData("hello \"goodnight \\\\\" moon\" world", new[]
{
 "hello", 
 "\"goodnight \\\\\"", 
 "moon\"", 
 "world", 
})]
public void SplitQualified(string input, IList<string> expected)
{
 var actual = input
 .SplitQualified(' ', '"')
 .ToList();
 Assert.Equal(expected.Count, actual.Count);
 for (var i = 0; i < actual.Count; i++)
 Assert.Equal(expected[i], actual[i]);
}

String Extension Methods

public static IEnumerable<string> SplitQualified(
 this string input, 
 char separator, 
 char qualifier, 
 StringSplitOptions options = StringSplitOptions.RemoveEmptyEntries, 
 char escape = '\\')
{
 if (String.IsNullOrWhiteSpace(input))
 return new string[0];
 var results = SplitQualified(input, separator, qualifier, escape);
 return options == StringSplitOptions.None
 ? results
 : results.Where(r => !String.IsNullOrWhiteSpace(r));
}
private static IEnumerable<string> SplitQualified(
 string input, 
 char separator, 
 char qualifier, 
 char escape)
{
 var separatorIndexes = input
 .IndexesOf(separator)
 .ToList();
 var qualifierIndexes = input
 .IndexesOf(qualifier)
 .ToList();
 // Remove Escaped Qualifiers
 for (var i = 0; i < qualifierIndexes.Count; i++)
 {
 var qualifierIndex = qualifierIndexes[i];
 if (qualifierIndex == 0)
 continue;
 if (input[qualifierIndex - 1] != escape)
 continue;
 // Watch out for a series of escaped escape characters.
 var escapeResult = false;
 for (var j = 2; qualifierIndex - j > 0; j++)
 {
 if (input[qualifierIndex - j] == escape)
 continue;
 escapeResult = j % 2 == 1;
 break;
 }
 if (qualifierIndex > 1 && escapeResult)
 continue;
 qualifierIndexes.RemoveAt(i);
 i--;
 }
 // Remove Qualified Separators
 if (qualifierIndexes.Count > 1)
 for (var i = 0; i < separatorIndexes.Count; i++)
 {
 var separatorIndex = separatorIndexes[i];
 for (var j = 0; j < qualifierIndexes.Count - 1; j += 2)
 {
 if (separatorIndex <= qualifierIndexes[j])
 continue;
 if (separatorIndex >= qualifierIndexes[j + 1])
 continue;
 separatorIndexes.RemoveAt(i);
 i--;
 }
 }
 // Split String On Separators
 var previousSeparatorIndex = 0;
 foreach (var separatorIndex in separatorIndexes)
 {
 var startIndex = previousSeparatorIndex == 0
 ? previousSeparatorIndex
 : previousSeparatorIndex + 1;
 var endIndex = separatorIndex == input.Length - 1
 || previousSeparatorIndex == 0
 ? separatorIndex - previousSeparatorIndex
 : separatorIndex - previousSeparatorIndex - 1;
 yield return input.Substring(startIndex, endIndex);
 previousSeparatorIndex = separatorIndex;
 }
 if (previousSeparatorIndex == 0)
 yield return input;
 else
 yield return input.Substring(previousSeparatorIndex + 1);
}
public static IEnumerable<int> IndexesOf(
 this string input, 
 char value)
{
 if (!String.IsNullOrWhiteSpace(input))
 {
 var index = -1;
 do
 {
 index++;
 index = input.IndexOf(value, index);
 if (index > -1)
 yield return index;
 else
 break;
 }
 while (index < input.Length);
 }
}
Shout it

Enjoy,
Tom

1 comment:

  1. Nice.

    I especially enjoy the reference to Goodnight Moon.

    Reply Delete

Subscribe to: Post Comments (Atom)

AltStyle によって変換されたページ (->オリジナル) /