Return to Question

I'm trying to beat the native Double.TryParse for performance in parsing large multi-million row (simple) CSV files as much as possible. I do not have to support exponential notation, thousand separators, Inf, -Inf, NaN, or anything exotic. Just millions of "0.00##" format doubles.

TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("123.45678"45", 123.4567845);
TestSuccess("-123.45678"45", -123.4567845);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
TestSuccess("0.12345678901234"12", 0.1234567890123412);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234"12", -0.1234567890123412);
TestSuccess("0.00000987654321"00", 0.0000098765432100);
TestSuccess("-0.00000987654321"00", -0.0000098765432100);
TestSuccess("1234567890123.0123456789"01", 1234567890123.012345678901);
TestSuccess("-1234567890123.0123456789"01", -1234567890123.012345678901);
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Special case, an empty dash is interpreted as negative zero (natively not parsable)
TestSuccess("-", -0d);

TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("123.45678", 123.45678);
TestSuccess("-123.45678", -123.45678);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
TestSuccess("0.12345678901234", 0.12345678901234);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234", -0.12345678901234);
TestSuccess("0.00000987654321", 0.00000987654321);
TestSuccess("-0.00000987654321", -0.00000987654321);
TestSuccess("1234567890123.0123456789", 1234567890123.0123456789);
TestSuccess("-1234567890123.0123456789", -1234567890123.0123456789);
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Special case, an empty dash is interpreted as negative zero (natively not parsable)
TestSuccess("-", -0d);

TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("123.45", 123.45);
TestSuccess("-123.45", -123.45);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
TestSuccess("0.12", 0.12);
TestSuccess("-0.12", -0.12);
TestSuccess("0.00", 0.00);
TestSuccess("-0.00", -0.00);
TestSuccess("1234567890123.01", 1234567890123.01);
TestSuccess("-1234567890123.01", -1234567890123.01);
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);

Slight tweak seems to help

Source Link

edited Aug 1, 2018 at 0:27

Alain

edited Aug 1, 2018 at 0:27

Alain

 unchecked
 {
 while (true)
 {
 // Return now if we have reached the end of the string
 if (currentIndex >= length)
 {
 result *= sign;
 return true;
 }
 nextChar = input[currentIndex++];
 // Break if the result wasn't a digit between 0 and 9
 if (nextChar < '0' || nextChar > '9') break;
 // Multiply by 10 and add the next digit.
 result = result * 10 + (nextChar - '0');
 }
 // The next character should be a decimal character, or else it's invalid.
 if (nextChar != CharDecimalSeparator) return false;
 double fractionalPart = 0d;
 int fractionLengh = length - currentIndex;
 while (currentIndex < length)
 {
 nextChar = input[currentIndex++];
 // If we encounter a non-digit now, it's an error
 if (nextChar < '0' || nextChar > '9') return false;
 fractionalPart = fractionalPart * 10 + (nextChar - '0');
 }
 // Adjust the magnitude ofAdd the fractional part and add to the result, apply sign, and return
 result += fractionalPart * if (fractionLengh < NegPow10.Length ?)
 NegPow10[fractionLengh]result := Math.Pow(10,result -fractionLengh)+ fractionalPart * NegPow10[fractionLengh]);
 * sign;
 // Apply theelse
 sign (1 or -1) before returning.
 result = (result *=+ fractionalPart * Math.Pow(10, -fractionLengh)) * sign;
 }
 return true;
}

Native Double.TryParse took ~4400~4500 ms.

Custom Parsers.FastTryParseDouble took ~1200~950 ms.

Performance gain was ~260%~370%

 unchecked
 {
 while (true)
 {
 // Return now if we have reached the end of the string
 if (currentIndex >= length)
 {
 result *= sign;
 return true;
 }
 nextChar = input[currentIndex++];
 // Break if the result wasn't a digit between 0 and 9
 if (nextChar < '0' || nextChar > '9') break;
 // Multiply by 10 and add the next digit.
 result = result * 10 + (nextChar - '0');
 }
 // The next character should be a decimal character, or else it's invalid.
 if (nextChar != CharDecimalSeparator) return false;
 double fractionalPart = 0d;
 int fractionLengh = length - currentIndex;
 while (currentIndex < length)
 {
 nextChar = input[currentIndex++];
 // If we encounter a non-digit now, it's an error
 if (nextChar < '0' || nextChar > '9') return false;
 fractionalPart = fractionalPart * 10 + (nextChar - '0');
 }
 // Adjust the magnitude of the fractional part and add to the result result += fractionalPart * (fractionLengh < NegPow10.Length ?
 NegPow10[fractionLengh] : Math.Pow(10, -fractionLengh));
 // Apply the sign (1 or -1) before returning.
 result *= sign;
 }
 return true;
}

Native Double.TryParse took ~4400 ms.

Custom Parsers.FastTryParseDouble took ~1200 ms.

Performance gain was ~260%

 unchecked
 {
 while (true)
 {
 // Return now if we have reached the end of the string
 if (currentIndex >= length)
 {
 result *= sign;
 return true;
 }
 nextChar = input[currentIndex++];
 // Break if the result wasn't a digit between 0 and 9
 if (nextChar < '0' || nextChar > '9') break;
 // Multiply by 10 and add the next digit.
 result = result * 10 + (nextChar - '0');
 }
 // The next character should be a decimal character, or else it's invalid.
 if (nextChar != CharDecimalSeparator) return false;
 double fractionalPart = 0d;
 int fractionLengh = length - currentIndex;
 while (currentIndex < length)
 {
 nextChar = input[currentIndex++];
 // If we encounter a non-digit now, it's an error
 if (nextChar < '0' || nextChar > '9') return false;
 fractionalPart = fractionalPart * 10 + (nextChar - '0');
 }
 // Add the fractional part to the result, apply sign, and return
 if (fractionLengh < NegPow10.Length)
 result = (result + fractionalPart * NegPow10[fractionLengh]) * sign;
 else
 result = (result + fractionalPart * Math.Pow(10, -fractionLengh)) * sign;
 }
 return true;
}