Although @AdrianoRepetti's answer is addressing some very important points, the suggested code
string[] characters = StringInfo.GetTextElementEnumerator(value).ToArray();
just didn't happen to exists. Nevertheless his answer gave me a headstart for an answer to my question.
So I digged a little bit into the StringInfo
class and discovered that there lives a very handy method, namely the SubstringByTextElements()
method which seems to serve my purpose well.
In addition the LengthInTextElements
property needs to replace the calls to the string.Length
property to make the whole thing work.
By reading @Dmitri's answer I realized that he is right about
You know an exact number of elements in the output, so it should be just an array string[].
Unfortunately I can't agree about the naming, because just because the string class already has a Split()
method I want to provide this as an overloaded one.
So after applying the mentioned points the methods will look like so
public static string[] Split(this string value, int desiredLength, bool strict = false)
{
EnsureValid(value, desiredLength, strict);
var stringInfo = new StringInfo(value);
int currentLength = stringInfo.LengthInTextElements;
if (currentLength == 0) { return new string[0]; }
int numberOfItems = currentLength / desiredLength;
int remaining = (currentLength > numberOfItems * desiredLength) ? 1 : 0;
var chunks = new string[numberOfItems + remaining];
for (var i = 0; i < numberOfItems; i++)
{
chunks[i] = stringInfo.SubstringByTextElements(i * desiredLength, desiredLength);
}
if (remaining != 0)
{
chunks[numberOfItems] = stringInfo.SubstringByTextElements(numberOfItems * desiredLength);
}
return chunks;
}
private static void EnsureValid(string value, int desiredLength, bool strict)
{
if (value == null) { throw new ArgumentNullException(nameof(value)); }
if (value.Length == 0 && desiredLength != 0)
{
throw new ArgumentException($"The passed {nameof(value)} may not be empty if the {nameof(desiredLength)} <> 0");
}
var info = new StringInfo(value);
int valueLength = info.LengthInTextElements;
if (valueLength != 0 && desiredLength < 1) { throw new ArgumentException($"The value of {nameof(desiredLength)} needs to be > 0"); }
if (strict && (valueLength % desiredLength != 0))
{
throw new ArgumentException($"The passed {nameof(value)}'s length can't be split by the {nameof(desiredLength)}");
}
}
which aren't only passing the former tests, but also this new tests
[TestMethod()]
public void SplitTestUnicodeVariant()
{
string input = "𠀑𠀑𠀑a𠀑𠀑𠀑";
string[] expected = new string[] { "𠀑𠀑", "𠀑a", "𠀑𠀑", "𠀑" };
string[] actual = input.Split(2);
CollectionAssert.AreEqual(expected, actual);
}
[TestMethod()]
public void SplitTestUnicodeVariant1()
{
string input = "dž𠀑𠀑a𠀑é𠀑";
string[] expected = new string[] { "dž𠀑", "𠀑a𠀑", "é𠀑" };
string[] actual = input.Split(3);
CollectionAssert.AreEqual(expected, actual);
}
[TestMethod()]
public void SplitTestUnicodeVariant2()
{
string input = "éée\u0301éé";
string[] expected = new string[] { "éé", "e\u0301é", "é" };
string[] actual = input.Split(2);
CollectionAssert.AreEqual(expected, actual);
}
- 50.9k
- 5
- 83
- 177