Return to Revisions

2 of 4

Indentation got messed up

edited Aug 2, 2018 at 16:15

Here is a much longer implementation that handles scientific notation, NaN, Infinity, Negative Infinity, and leading positive signs. I also added a lot of comments to help visually break it into chunks.

It manages to be almost as fast as the previous method - most of the logic takes place in the body of previous if checks that simply returned false before.

I found a few places where I could avoid repeated checks for non-digit characters, and use the first digit character to initialize the result directly to avoid unnecessary additions/multiplcations with zero on the first iteration of the loop.

public static bool FastTryParseDouble(string input, out double result)
{
 int length = input.Length;
 if (length <= 0)
 {
 result = Double.NaN;
 return false;
 }
 double sign = 1d;
 int currentIndex = 0;
 char nextChar = input[0];
 /**************** Sign (+/-) and Special Case String Representations *****************/
 // Handle all cases where the string does not start with a numeric character
 if (nextChar < '0' || nextChar > '9')
 {
 // Non-numeric 1-character strings must match one of the supported special cases.
 if (length == 1)
 return CheckForSpecialCaseDoubleStrings(input, out result);
 // For anything more than one character, this should be a sign character.
 if (nextChar == CharNegative)
 sign = -1d;
 // The very next character may also be the decimal separator.
 else if (nextChar == CharDecimalSeparator)
 {
 // In this case, we treat the integer part as 0 and skip to the fractional part.
 result = 0d;
 goto SkipIntegerPart;
 }
 // Finally, unless it was a '+' sign, input must match one of a set of special cases.
 else if (nextChar != CharPositive)
 return CheckForSpecialCaseDoubleStrings(input, out result);
 // Once the sign is consumed, advance to the next character for further parsing
 nextChar = input[unchecked(++currentIndex)];
 // We must once more check whether the character is numeric before proceeding.
 if (nextChar < '0' || nextChar > '9')
 {
 // If not numeric, at this point, the character can only be a decimal separator
 // (as in "-.123" or "+.123"), or else it must be part of a special case string
 // (as in "-∞"). So check for those.
 if (nextChar != CharDecimalSeparator)
 return CheckForSpecialCaseDoubleStrings(input, out result);
 result = 0d;
 goto SkipIntegerPart;
 }
 }
 /********************************** "Integer Part" ***********************************/
 // Treat all subsequent numeric characters as the "integer part" of the result.
 unchecked
 {
 // Since we've already checked that the next character is numeric,
 // We can save 2 ops by initializing the result directly.
 result = nextChar - '0';
 while(true)
 {
 // Return the "integer part" immediately if we encounter the end of the string
 if (++currentIndex >= length)
 {
 result *= sign;
 return true;
 }
 nextChar = input[currentIndex];
 if (nextChar < '0' || nextChar > '9') break;
 result = result * 10d + (nextChar - '0');
 }
 }
 // This label and corresponding goto statements is a performance optimization to
 // allow us to efficiently skip "integer part" parsing in cases like ".123456"
 // Please don't be mad.
 SkipIntegerPart: 
 
 // The expected case is that the next character is a decimal separator.
 if (nextChar != CharDecimalSeparator)
 {
 // If not this is only allowed to be the exponent character
 if (nextChar != 'e' && nextChar != 'E') return false;
 // Skip fractional parsing and jump to handling exponential notation.
 result = result * sign;
 }
 else
 {
 /******************************* "Fractional Part" *******************************/
 double fractionalPart = 0d;
 int fractionStart = ++currentIndex;
 // Perform a similar loop to construct the fractional part of the decimal
 unchecked
 {
 do
 {
 nextChar = input[currentIndex];
 if (nextChar < '0' || nextChar > '9') break;
 fractionalPart = fractionalPart * 10d + (nextChar - '0');
 } while (++currentIndex < length);
 // Adjust the magnitude of the fractional part and add it to the result
 int fractionLength = currentIndex - fractionStart;
 // Use our tiny array of negative powers of 10 if possible.
 if (fractionLength < NegPow10Length)
 result = sign * (result + fractionalPart * NegPow10[fractionLength]);
 // Fallback to our larger array (still fast), whose higher indices store negative powers
 else if (fractionLength < MaxDoubleExponent)
 result = sign * (result + fractionalPart * Pow10[MaxDoubleExponent + fractionLength]);
 // This would only happen for ridiculous strings (longer than 300 characters),
 // but technically we can still fall-back to using native Math.Pow to do the shift.
 else
 result = sign * (result + fractionalPart * Math.Pow(10, -fractionLength));
 }
 // If we've consumed every character, return now
 if (currentIndex >= length) return true;
 // If there are any more characters, the next character must be 'E' or 'e'
 if (nextChar != 'e' && nextChar != 'E') return false;
 }
 /**************************** "Scientific Notation Part" *****************************/
 // If we're at the end of the string (last character was 'e' or 'E'), that's an error
 if (unchecked(++currentIndex) >= length) return false;
 // Otherwise, advance the current character and begin parsing the exponent
 nextChar = input[currentIndex];
 bool exponentIsNegative = false;
 // The next character can only be a +/- sign, or a numeric character
 if (nextChar < '0' || nextChar > '9')
 {
 if (nextChar == CharNegative)
 exponentIsNegative = true;
 else if (nextChar != CharPositive)
 return false;
 // Again, require there to be at least one more character in the string after the sign
 if (unchecked(++currentIndex) >= length) return false;
 nextChar = input[currentIndex];
 // And verify that this next character is numeric
 if (nextChar < '0' || nextChar > '9') return false;
 }
 unchecked
 {
 // Since we know the next character is a digit, we can initialize the exponent int
 // directly and avoid 2 wasted ops (multiplying by and adding to zero).
 int exponent = nextChar - '0';
 // Shift and add any additional digit characters
 while (++currentIndex < length)
 {
 nextChar = input[currentIndex];
 // If we encounter any non-numeric characters now, it's definitely an error
 if (nextChar < '0' || nextChar > '9') return false;
 exponent = exponent * 10 + (nextChar - '0');
 }
 // Convert the exponent into index we can jump to in our powers-of-ten array
 if (exponentIsNegative)
 exponent += MaxDoubleExponent;
 else if (exponent > MaxDoubleExponent)
 return false;
 // Use our array of powers, unless the exponent is a very large negative number,
 // in which case we fall back to computing the power on-the-fly.
 result *= exponent < Pow10Length ? Pow10[exponent] :
 Math.Pow(10, MaxDoubleExponent - exponent);
 }
 return true;
}
/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
 if (input == NumberFormat.PositiveInfinitySymbol)
 result = Double.PositiveInfinity;
 else if (input == NumberFormat.NegativeInfinitySymbol)
 result = Double.NegativeInfinity;
 else if (input == NumberFormat.NaNSymbol)
 result = Double.NaN;
 // Special Case: Excel has been known to format zero as "-".
 // We intentionally support it by returning zero now (most parsers would not)
 else if (input == NumberFormat.NegativeSign)
 result = 0d;
 // Special Case: Our organization treats the term "Unlimited" as referring
 // to Double.MaxValue (most parsers would not)
 else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
 result = Double.MaxValue;
 // Anything else is not a valid input
 else
 {
 result = Double.NaN;
 return false;
 }
 return true;
}
/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;
/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;
/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
 Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
 .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
 .ToArray();
/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;
/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
 Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

This new method matches all of the following test cases:

// Numbers without a fractional part
TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
// Numbers with a fractional part
TestSuccess("123.45678", 123.45678);
TestSuccess("-123.45678", -123.45678);
// Numbers without an integer part
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234", -0.12345678901234);
// Various high-precision numbers
TestSuccess("0.12345678901234", 0.12345678901234);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess("0.00000987654321", 0.00000987654321);
TestSuccess("-0.00000987654321", -0.00000987654321);
TestSuccess("1234567890123.0123456789", 1234567890123.0123456789);
TestSuccess("-1234567890123.0123456789", -1234567890123.0123456789);
// Numbers with very long fractional parts (more than 16 characters)
TestSuccess("0.00826499999979784", 0.00826499999979784);
TestSuccess("-0.00826499999979784", -0.00826499999979784);
TestSuccess("1.0123456789012345678901234567890", 1.0123456789012345678901234567890);
TestSuccess("-1.0123456789012345678901234567890", -1.0123456789012345678901234567890);
// Numbers with a leading positive sign
TestSuccess("+1", 1d);
TestSuccess("+12345678901234", 12345678901234d);
TestSuccess("+.12345678901234", 0.12345678901234);
TestSuccess("+0.00826499999979784", 0.00826499999979784);
// Very large numbers without scientific notation
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
// Very small numbers without scientific notation
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Scientific notation without a sign
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("-1.2345678e5", -1.2345678e5);
// Scientific notation with a sign
TestSuccess("1.2345678e+25", 1.2345678e25);
TestSuccess("-1.2345678e+25", -1.2345678e25);
TestSuccess("1.2345678e-255", 1.2345678e-255);
TestSuccess("-1.2345678e-255", -1.2345678e-255);
// Boundary cases
TestSuccess("1e0", 1);
TestSuccess("1e1", 10);
TestSuccess("1e-1", 0.1);
TestSuccess("1e308", 1e308);
// Min and Max Double
TestSuccess("1.7976931348623157E+308", 1.7976931348623157E+308);
TestSuccess("-1.7976931348623157E+308", -1.7976931348623157E+308);
// Large Negative Exponents (Near-epsilon) doubles.
TestSuccess("1.23E-999", 1.23E-999);
TestSuccess("-1.23E-999", -1.23E-999);
// Special keywords
TestSuccess("∞", Double.PositiveInfinity);
TestSuccess("-∞", Double.NegativeInfinity);
TestSuccess("NaN", Double.NaN);
// Special case: "Unlimited" is used in our organization to refer to Double.MaxValue
TestSuccess("Unlimited", Double.MaxValue);
// Special case: "-" character only means zero in accounting formats.
TestSuccess("-", 0d);

Benchmark Results

Using a Stopwatch this time, just to quell any debate:

Native parser took 4583 ms.

Custom parser took 1162 ms.

Performance gain was 294.41%

answered Aug 2, 2018 at 16:08

Alain

default