In this comment the OP wrote,
I am a newbie so i would like to know how would i parse the negetive numbers/arguments ?
In this answer @200_success showed an implementation using the strtod function.
The following is my implementation of the strtod function:
A valid floating point number for strtod using the "C" locale is formed by an optional sign character (+ or -), followed by a sequence of digits, optionally containing a decimal-point character (.), optionally followed by an exponent part (an e or E character followed by an optional sign and a sequence of digits).
I'm not using a modern compiler so I didn't implement this portion of the spec.:
If the correct value is out of the range of representable values for the type, a positive or negative HUGE_VAL is returned, and errno is set to ERANGE.
If the correct value would cause underflow, the function returns a value whose magnitude is no greater than the smallest normalized positive number and sets errno to ERANGE.
- Is my implementation correct (does it return correct output for all input)?
- Is it easy to read even without comments?
- Is the set of test cases sufficiently complete?
- Any other suggestions for improvement?1
1(except comments on where I put { }
braces, and whether I use them around single-line control statements
double strtod(const char* str, char** endptr)
{
double result = 0.0;
char signedResult = '0円';
char signedExponent = '0円';
int decimals = 0;
bool isExponent = false;
bool hasExponent = false;
bool hasResult = false;
// exponent is logically int but is coded as double so that its eventual
// overflow detection can be the same as for double result
double exponent = 0;
char c;
for (; '0円' != (c = *str); ++str)
{
if ((c >= '0') && (c <= '9'))
{
int digit = c - '0';
if (isExponent)
{
exponent = (10 * exponent) + digit;
hasExponent = true;
}
else if (decimals == 0)
{
result = (10 * result) + digit;
hasResult = true;
}
else
{
result += (double)digit / decimals;
decimals *= 10;
}
continue;
}
if (c == '.')
{
if (!hasResult)
{
// don't allow leading '.'
break;
}
if (isExponent)
{
// don't allow decimal places in exponent
break;
}
if (decimals != 0)
{
// this is the 2nd time we've found a '.'
break;
}
decimals = 10;
continue;
}
if ((c == '-') || (c == '+'))
{
if (isExponent)
{
if (signedExponent || (exponent != 0))
break;
else
signedExponent = c;
}
else
{
if (signedResult || (result != 0))
break;
else
signedResult = c;
}
continue;
}
if (c == 'E')
{
if (!hasResult)
{
// don't allow leading 'E'
break;
}
if (isExponent)
break;
else
isExponent = true;
continue;
}
// else unexpected character
break;
}
if (isExponent && !hasExponent)
{
while (*str != 'E')
--str;
}
if (!hasResult && signedResult)
--str;
if (endptr)
*endptr = const_cast<char*>(str);
for (; exponent != 0; --exponent)
{
if (signedExponent == '-')
result /= 10;
else
result *= 10;
}
if (signedResult == '-')
{
if (result != 0)
result = -result;
// else I'm not used to working with double-precision numbers so I
// was surprised to find my assert for "-0" failing, saying -0 != +0.
}
return result;
}
// This header is only needed for assert, not for strtod implementation
#include <cstring>
void assert(const char* s, double d, const char* remainder)
{
char* endptr;
double result = strtod(s, &endptr);
if ((result!=d) || strcmp(endptr, remainder))
throw "failed";
}
int main()
{
assert("0", 0, "");
assert("-0", 0, "");
assert("12", 12, "");
assert("23.5", 23.5, "");
assert("-14", -14, "");
assert("-", 0, "-");
assert("-2-a", -2, "-a");
assert("-2a", -2, "a");
assert("0.036", 0.036, "");
assert("12.5E2", 12.5E2, "");
assert("12.5E-3", 12.5E-3, "");
assert("12.5E0", 12.5E0, "");
assert("12.5E", 12.5, "E");
assert("12.5E-", 12.5, "E-");
assert("", 0, "");
assert("a", 0, "a");
assert("E10", 0, "E10");
assert("-E10", 0, "-E10");
assert("-0E10", 0, "");
assert(".3", 0, ".3");
assert("-.3", 0, "-.3");
strtod("42C", 0); // tests endptr == null
assert("+12", 12, "");
assert("+-12", 0, "+-12");
assert("12.5E+3", 12.5E+3, "");
assert("12.5E+-3", 12.5, "E+-3");
}
3 Answers 3
One of the biggest obstacles to comprehension in my mind is the inconsistent bracing. As long as you use the same style everywhere, it doesn't matter that much, but please do use the same thing everywhere.
With all the bracing and stuff modified to be consistant, the code is much more readable, if not completely so:
double strtod(const char* str, char** endptr){
double result = 0.0;
char signedResult = '0円';
char signedExponent = '0円';
int decimals = 0;
bool isExponent = false;
bool hasExponent = false;
bool hasResult = false;
// exponent is logically int but is coded as double so that its eventual
// overflow detection can be the same as for double result
double exponent = 0;
char c;
for (; '0円' != (c = *str); ++str) {
if ((c >= '0') && (c <= '9')) {
int digit = c - '0';
if (isExponent) {
exponent = (10 * exponent) + digit;
hasExponent = true;
} else if (decimals == 0) {
result = (10 * result) + digit;
hasResult = true;
} else {
result += (double)digit / decimals;
decimals *= 10;
}
continue;
}
if (c == '.') {
if (!hasResult) break; // don't allow leading '.'
if (isExponent) break; // don't allow decimal places in exponent
if (decimals != 0) break; // this is the 2nd time we've found a '.'
decimals = 10;
continue;
}
if ((c == '-') || (c == '+')) {
if (isExponent) {
if (signedExponent || (exponent != 0)) break;
else signedExponent = c;
} else {
if (signedResult || (result != 0)) break;
else signedResult = c;
}
continue;
}
if (c == 'E') {
if (!hasResult) break; // don't allow leading 'E'
if (isExponent) break;
else isExponent = true;
continue;
}
break; // unexpected character
}
if (isExponent && !hasExponent) {
while (*str != 'E')
--str;
}
if (!hasResult && signedResult) --str;
if (endptr) *endptr = const_cast<char*>(str);
for (; exponent != 0; --exponent) {
if (signedExponent == '-') result /= 10;
else result *= 10;
}
if (signedResult == '-' && result != 0) result = -result;
return result;
}
As far as correctness goes, there is only one flaw I have spotted, but there are structural problems that you might want to fix. (The error I found is that you only allow a capital "E" to signify the exponent, while the standard allows use of either a capital "E" or lowercase "e".)
Structurally, you should consider refactoring out different parts of the function. You should, for instance refactor out a function processing a string of digits into an integral type into a separate method. And you should see if you can separate the big for loop into different loops for processing the different parts of the string.
-
\$\begingroup\$ Having this all one one line
if (!hasResult) break; // don't allow leading '.'
is IMO compact and readable; but, it's so bold! Because it contradicts various style guides like this one. \$\endgroup\$ChrisW– ChrisW2014年03月20日 15:59:29 +00:00Commented Mar 20, 2014 at 15:59 -
\$\begingroup\$ @ChrisW And yet there are many other style guides that recommend single statements in braceless if statements be placed on the same line. The important thing though is consistency and readability, not following any particular style guide. \$\endgroup\$AJMansfield– AJMansfield2014年03月20日 16:04:01 +00:00Commented Mar 20, 2014 at 16:04
-
\$\begingroup\$ To "refactor out a function processing a string of digits" I get I would need,
int strtoi(const char* str, char** endptr)
. And if it returns an int, then the caller needs to convert e.g. "123" to "0.123" if it's after the decimal point. \$\endgroup\$ChrisW– ChrisW2014年03月20日 18:01:22 +00:00Commented Mar 20, 2014 at 18:01
After reading the specification of the input ...
- Optional sign
- One or more digits
- Optional decimal with one or more digits
- Optional exponent with
- Optional sign
- one or more digits
... instead of a single for loop, it might have been clearer (easier to see the mapping from requirements to implementation) to have a succession of 3 for
loops.
It's not completely clear from the specification what the behaviour of "12." should be. "12." is accepted as a valid number by the C++ compiler in source code. This assert succeeds:
assert("12.", 12., "");
... but is missing from the set of test cases in the OP.
Incorrect functionality preventing creation of -0.0
.
-0.0
is a legitimate result of strtod()
. Although -0.0
and +0.0
have the same arithmetic value, +0.0 == -0.0
, they differ in sign.
// if (signedResult == '-' && result != 0) result = -result;
if (signedResult == '-') result = -result;
If you would like a test to assert if the result is +0.0
or -0.0
, consider memcmp()
or What operations and functions on +0.0 and -0.0 give different arithmetic results?
double pz = 0.0;
double nz = -0.0;
assert(memcmp(&test_result, &pz, sizeof pz) == 0); // test if canonically the same as +0.0
assert(memcmp(&test_result, &nz, sizeof nz) == 0); // test if canonically the same as -0.0
-
\$\begingroup\$ So I guess that also implies that this assert is wrong:
assert("-0", 0, "");
... with your change, maybe the assert would need to beassert("-0", -0.0, "");
. \$\endgroup\$ChrisW– ChrisW2014年09月30日 21:27:58 +00:00Commented Sep 30, 2014 at 21:27 -
1\$\begingroup\$ See update for testing +0.0, -0.0. Show in your asserts which if the test value and and which is the result of the computation. Your
assert()
function differs from what I expectassert(something that evalutes true or false)
. \$\endgroup\$chux– chux2014年09月30日 21:29:43 +00:00Commented Sep 30, 2014 at 21:29
Explore related questions
See similar questions with these tags.
double
well enough to do that comparison without a helpful header. \$\endgroup\$if (signedResult || (result != 0))
assuming that ifresult
is zero, no input has been read. \$\endgroup\$if (signedResult) result = - result;
The problem was that failed my assert that "-0" ought to return0.0
. Instead it returned-0.0
and I didn't know/understand why0.0 != -0.0
... therefore I added the&& (result != 0)
to the implementation, and didn't negate if result is 0. \$\endgroup\$