From K&R:
Extend
atof
to handle scientific notation of the form 123.45e-6 where a floating-point number may be followed bye
orE
and an optionally signed exponent.
I only added the part that handles the scientific notation. It compiles and works fine. I think it's way too long and could be shortened. I also don't like the names.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX_SIZE 20
double atof(const char *s){
int i;
for(i = 0; isspace(s[i]); ++i);
/*skip white space*/
int sign;
sign = (s[i] == '-')? -1 : 1; /*The sign of the number*/
if(s[i] == '-' || s[i] == '+'){
++i;
}
double value;
for(value = 0.0; isdigit(s[i]); ++i){
value = value * 10.0 + (s[i] - '0');
}
if(s[i] == '.'){
++i;
}
double power;
for(power = 1.0; isdigit(s[i]); ++i){
value = value * 10.0 + (s[i] - '0');
power *= 10.0;
}
if(s[i] == 'e' || s[i] == 'E'){
++i;
}
else{
return sign * value/power;
}
int powersign; /*The sign following the E*/
powersign = (s[i] == '-')? -1 : 1;
if(s[i] == '-' || s[i] == '+'){
++i;
}
int power2; /*The number following the E*/
for(power2 = 0; isdigit(s[i]); ++i){
power2 = power2 * 10 + (s[i] - '0');
}
if(powersign == -1){
while(power2 != 0){
power *= 10;
--power2;
}
}
else{
while(power2 != 0){
power /= 10;
--power2;
}
}
return sign * value/power;
}
int main(void){
char string[MAX_SIZE];
fgets(string, sizeof(string), stdin);
printf("%.9f", atof(string));
return 0;
}
2 Answers 2
Thanks for including a test program - that's always valuable!
However, I'm going to change it, to parse a battery of test cases instead of reading from stdin:
#include <stdio.h>
int main(void)
{
static const char *const strings[] = {
/* these should parse fully */
"12",
"12.0",
"08", /* not octal! */
"+12.34",
".34",
"\t \n2.",
"1e0",
"1e+0",
"1e-0",
"1.e4",
".1e-4",
"-5e006",
"-5e+16",
"-.05",
"-.0",
"-1e6",
/* these should parse only the initial part */
"5c5",
"10ee5",
"0x06", /* not hex! */
"--1" ,
"-+1" ,
"1e--4" ,
"-1e.4",
"1e 4",
"1e-g",
"", "foobar", /* both 0 */
" e5", /* also 0 */
"-1e6",
/* overflow/underflow */
"1e500000",
"1e-500000",
"-1e500000",
"-1e-500000",
};
static const int max = sizeof strings / sizeof strings[0];
for (int i = 0; i < max; ++i)
printf("%20s = > %.9g\n", strings[i], extended_atof(strings[i]));
}
(I changed the function name to extended_atof()
so as to be safely distinct from the standard library atof()
.)
Your implementation passes all these tests. Now we can look at refactoring.
Remove duplication
The things that we parse in multiple places are:
- optional sign
+
or-
- digit sequences
So perhaps we can refactor each of those into a function? Instead of using an integer index into the supplied string, I prefer to just move the string pointer, and eliminate the need for i
:
/* return true for positive, false for negative,
and advance `*s` to next position */
static bool parse_sign(const char **s)
{
switch (**s) {
case '-': ++*s; return false;
case '+': ++*s; return true;
default: return true;
}
}
Let's make use of that in the function:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/
double value = 0.0;
while (isdigit(*s))
value = value * 10.0 + (*s++ - '0');
if (*s == '.') {
++s;
}
double power = 1.0;
while (isdigit(*s)) {
value = value * 10.0 + (*s++ - '0');
power *= 10.0;
}
if (tolower(*s) == 'e') {
++s;
} else {
return sign * value/power;
}
bool powersign = parse_sign(&s); /*The sign following the E*/
int power2 = 0.0; /*The number following the E*/
while (isdigit(*s))
power2 = power2 * 10.0 + (*s++ - '0');
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
return sign * value/power;
}
It's slightly shorter, and it still passes all the tests.
Let's see if we can read digit strings in a function, and replace the three places we do that. We'll make it update a count of how many digits wore parsed, so we don't lose leading zeros in the fractional part:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/
double value = parse_digits(&s, NULL);
if (*s == '.') {
++s;
int d; /* digits in fraction */
double fraction = parse_digits(&s, &d);
while (d--)
fraction /= 10.0;
value += fraction;
}
value *= sign;
if (tolower(*s) == 'e') {
++s;
} else {
return value;
}
bool powersign = parse_sign(&s); /*The sign following the E*/
int power2 = parse_digits(&s, NULL); /*The number following the E*/
double power = 1.0;
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
return value/power;
}
Tests still pass; what's next?
if (tolower(*s) == 'e') {
++s;
} else {
return value;
}
This can be reversed, and if we're returning, it doesn't matter what we do to s
:
if (tolower(*s++) != 'e')
return value;
Here's some near-duplicate blocks:
double power = 1.0;
if (powersign) {
while (power2 != 0) {
power /= 10;
--power2;
}
} else {
while (power2 != 0) {
power *= 10;
--power2;
}
}
Dividing by 10 is the same as multiplying by 0.1, so we can move the test into the loop:
double power = 1.0;
while (power2 != 0) {
power *= powersign ? 0.1 : 10;
--power2;
}
We could go further, and capture powersign ? 0.1 : 10
into a variable. We can also eliminate the power
variable from here, and multiply value
directly:
const double exponentsign = parse_sign(&s) ? 10. : .1;
int exponent = parse_digits(&s, NULL);
while (exponent--)
value *= exponentsign;
Final version
Here's what I finished up with:
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
/* return true for positive, false for negative,
and advance `*s` to next position */
static bool parse_sign(const char **const s)
{
switch (**s) {
case '-': ++*s; return false;
case '+': ++*s; return true;
default: return true;
}
}
/* return decimal value of digits,
advancing `*s` to the next character,
and storing the number of digits read into *count */
static double parse_digits(const char **const s, int *const count)
{
double value = 0.0;
int c = 0;
while (isdigit(**s)) {
value = value * 10.0 + (*(*s)++ - '0');
++c;
}
if (count)
*count = c;
return value;
}
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
const bool valuesign = parse_sign(&s); /* sign of the number */
double value = parse_digits(&s, NULL);
if (*s == '.') {
int d; /* number of digits in fraction */
++s;
double fraction = parse_digits(&s, &d);
while (d--)
fraction /= 10.0;
value += fraction;
}
if (!valuesign)
value = -value;
if (tolower(*s++) != 'e')
return value;
/* else, we have an exponent; parse its sign and value */
const double exponentsign = parse_sign(&s) ? 10. : .1;
int exponent = parse_digits(&s, NULL);
while (exponent--)
value *= exponentsign;
return value;
}
/* Test program */
#include <stdio.h>
int main(void)
{
static const char *const strings[] = {
/* these should parse fully */
"12",
"12.0",
"08", /* not octal! */
"+12.34",
".34",
"\t \n2.",
"1e0",
"1e+0",
"1e-0",
"1.e4",
".1e-4",
"-5e006",
"-5e+16",
"-.05",
"-.0",
"-1e6",
/* these should parse only the initial part */
"5c5",
"10ee5",
"0x06", /* not hex! */
"--1" ,
"-+1" ,
"1e--4" ,
"-1e.4",
"1e 4",
"1e-g",
"", "foobar", /* both 0 */
" e5", /* also 0 */
"-1e6",
/* overflow/underflow */
"1e500000",
"1e-500000",
"-1e500000",
"-1e-500000",
};
static const int max = sizeof strings / sizeof strings[0];
for (int i = 0; i < max; ++i)
printf("%20s = > %.9g\n", strings[i], extended_atof(strings[i]));
}
There's still an opportunity for a small improvement: an extremely long fractional part could overflow double
(this problem existed in your original). Instead of returning a large value from parse_int()
, you could consider always returning a fractional value in the range [0...1), and use the number of digits to scale up the integer parts. Then we'd just end up with lost precision at the lower end. That would look like:
static double parse_digits(const char **const s, int *const count)
{
double value = 0.0;
double increment = 0.1;
int c = 0;
while (isdigit(**s)) {
value += increment * (*(*s)++ - '0');
increment /= 10;
++c;
}
if (count)
*count = c;
return value;
}
The corresponding uses would be:
double extended_atof(const char *s)
{
/*skip white space*/
while (isspace(*s))
++s;
int d; /* number of digits */
const bool valuesign = parse_sign(&s); /* sign of the number */
double value = parse_digits(&s, &d);
while (d--)
value *= 10;
if (*s == '.') {
++s;
double fraction = parse_digits(&s, NULL);
value += fraction;
}
if (!valuesign)
value = -value;
if (tolower(*s++) != 'e')
return value;
/* else, we have an exponent; parse its sign and value */
const double exponentsign = parse_sign(&s) ? 10. : .1;
double exponent_f = parse_digits(&s, &d);
while (d--)
exponent_f *= 10;
unsigned long exponent = exponent_f;
while (exponent-->0)
value *= exponentsign;
return value;
}
-
1\$\begingroup\$ Some corer cases: 1)
isspace(*s)
is UB if*s < 0
(and notEOF
). Same fortolower()
. Could useisspace((unsigned char) *s)
2)value
computation andexponent_f *= 10;
can overflow (or UF to 0) beforevalue *= exponentsign;
even thought the result would otherwise be in range."0.17e309"
,"2.22e-308"
. 3)parse_digits()
introduces lots of rounding errors (weakness), builds upvalue
from MSdigits to LSD - which is numerically less accurate than the reverse. Yet does handle not OF readily (strength). Still overall a goodatof()
\$\endgroup\$chux– chux2017年03月25日 03:36:15 +00:00Commented Mar 25, 2017 at 3:36 -
\$\begingroup\$ Dividing by 10.0 is not the same as multiplying with 0.1. \$\endgroup\$Roland Illig– Roland Illig2017年03月26日 11:10:14 +00:00Commented Mar 26, 2017 at 11:10
-
\$\begingroup\$ @Roland, in general I agree,
*.1
and/10.
aren't necessarily identical, but in this specific case (decimal->float conversion), the difference is significant. \$\endgroup\$Toby Speight– Toby Speight2017年03月27日 07:46:41 +00:00Commented Mar 27, 2017 at 7:46 -
\$\begingroup\$ @chux: several good points - thank you! I'll see if I can find time to re-work a solution with those facts taken into account. \$\endgroup\$Toby Speight– Toby Speight2017年03月27日 07:48:21 +00:00Commented Mar 27, 2017 at 7:48
I think that the worst case of duplication that you need to remove from your code is writing another set of lines for reading the floating number after e (/E).
Even I am just a beginner and doing K&R right now. Here's what I thought when doing the exercise:
For extending the program to handle scientific notations the first thing that I require is to read another floating point number after the character e/E. The code for doing this is already present in the function. Which makes it obvious that we are to reuse that code somehow. I thought that no extra lines of code should should be written for implementing this functionality.
I found that using recursion along with math.h library shortened and simplified the code (particularly the part used for reading the number following e/E) quite considerably.
Here's the code that I wrote:
#include<stdio.h>
#include<ctype.h>
#include<math.h>
#include<string.h>
double atof(char *);
int main(void)
{
char num[20];
scanf("%19s", num);
double number=atof(num);
printf("\n%lf", number);
return 0;
}
double atof(char *num)
{
double val=0.0;
int place=1;
double expo=1.0;
int i=0;
int sign=1;
for(; isspace(num[i]); i++); //skip spaces
sign=(num[i]=='-')?-1:1; //determine sign
if(num[i]=='-'||num[i]=='+')
++i;
while(isdigit(num[i])){ //digits before decimal
val=(val*10)+(num[i]-'0');
++i;
}
if(num[i]=='.') //skip decimal point if present
++i;
while(isdigit(num[i])){ //digits after decimal
val=val*10+(num[i]-'0');
place*=10;
i++;
}
if(num[i]=='e' || num[i]=='E'){ //the extended part for scientific notations
++i;
expo=pow(10,atof(num+i));
}
return (sign*val*expo)/(place);
}
-
1\$\begingroup\$ Same issues as with this comment.
atof(&num[i])
is curious. I'd expectexpo=pow(10,atoi(&num[i]));
This does not pass"1E10"
(onlye
passes) \$\endgroup\$chux– chux2017年03月25日 03:40:31 +00:00Commented Mar 25, 2017 at 3:40 -
\$\begingroup\$ @chux didn't get what you mean by
atoi(&num[i]))
\$\endgroup\$Nityesh Agarwal– Nityesh Agarwal2017年03月26日 09:59:42 +00:00Commented Mar 26, 2017 at 9:59 -
\$\begingroup\$ This answer's
expo=pow(10,atof(num+i));
callsatof()
, which returnsdouble
, to parse a string encoding an integer. As the exponent is an integer I'd expect code to useatoi()
rather thanatof()
. \$\endgroup\$chux– chux2017年03月26日 20:05:08 +00:00Commented Mar 26, 2017 at 20:05 -
\$\begingroup\$ @chux Isn't it better if the program handles floating point exponents as well? \$\endgroup\$Nityesh Agarwal– Nityesh Agarwal2017年03月27日 17:02:01 +00:00Commented Mar 27, 2017 at 17:02
-
\$\begingroup\$ Better - perhaps, yet certainly unexpected. So this code would handle
"0.5e0.4e0.3e0.2e0.1"
as 11.8... I'd expect 0.5 or error return of 0.0. IAC,atof()
error behavior is undefined, so this answer's handling of unusual strings is not wrong. \$\endgroup\$chux– chux2017年03月27日 17:16:52 +00:00Commented Mar 27, 2017 at 17:16
stdlib.h
? So do you need to implement that yourself at all? \$\endgroup\$strtod
is implemented. (or perhaps not, if it is full of platform-dependent trickery!) \$\endgroup\$