C-style double expression parser

Question 1

The purpose of this code is to parse any expression as could appear in C that involves doubles or functions that involve only doubles.

I have written parsers before:

This is an upgraded version that uses more descriptive variable names and structures instead of parallel arrays. This version can also handle functions of varying arity. I was even planning on doing short string optimization here but had to skip that because reasons.

#include <math.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
typedef struct Symbol {
 unsigned arity, len;
 const char *name;
 union {
 double
 func0,
 (*func1)(double),
 (*func2)(double, double),
 (*func3)(double, double, double);
 };
} Symbol;
static int compar(const void *const restrict strp, const void *const restrict p) {
 const unsigned len = ((const Symbol *)p)->len;
 const char
 *const str = *(const char *const *)strp,
 *const name = ((const Symbol *)p)->name;
 const int cmp = memcmp(str, name, len);
 return cmp
 ? cmp
 : isalnum((unsigned char)str[len]) || str[len] == '_'
 ? str[len]-name[len]
 : (*(const char **)strp += len, 0);
}
#define SYMBOL(a, n)\
{\
 .arity = a,\
 .len = sizeof(#n)-1,\
 .name = #n,\
 .func##a = n\
}
static const Symbol symbols[] = {
 SYMBOL(0, M_1_PI),
 SYMBOL(0, M_2_PI),
 SYMBOL(0, M_2_SQRTPI),
 SYMBOL(0, M_E),
 SYMBOL(0, M_LN10),
 SYMBOL(0, M_LN2),
 SYMBOL(0, M_LOG10E),
 SYMBOL(0, M_LOG2E),
 SYMBOL(0, M_PI),
 SYMBOL(0, M_PI_2),
 SYMBOL(0, M_PI_4),
 SYMBOL(0, M_SQRT1_2),
 SYMBOL(0, M_SQRT2),
 SYMBOL(1, acos),
 SYMBOL(1, acosh),
 SYMBOL(1, asin),
 SYMBOL(1, asinh),
 SYMBOL(1, atan),
 SYMBOL(2, atan2),
 SYMBOL(1, atanh),
 SYMBOL(1, cbrt),
 SYMBOL(1, ceil),
 SYMBOL(1, cos),
 SYMBOL(1, cosh),
 SYMBOL(2, copysign),
 SYMBOL(1, erf),
 SYMBOL(1, erfc),
 SYMBOL(1, exp),
 SYMBOL(1, exp2),
 SYMBOL(1, expm1),
 SYMBOL(1, fabs),
 SYMBOL(2, fdim),
 SYMBOL(1, floor),
 SYMBOL(3, fma),
 SYMBOL(2, fmax),
 SYMBOL(2, fmin),
 SYMBOL(2, fmod),
 SYMBOL(2, hypot),
 SYMBOL(1, lgamma),
 SYMBOL(1, log),
 SYMBOL(1, log10),
 SYMBOL(1, log1p),
 SYMBOL(1, log2),
 SYMBOL(1, logb),
 SYMBOL(2, nextafter),
 SYMBOL(2, pow),
 SYMBOL(2, remainder),
 SYMBOL(1, round),
 SYMBOL(1, sin),
 SYMBOL(1, sinh),
 SYMBOL(1, sqrt),
 SYMBOL(1, tan),
 SYMBOL(1, tanh),
 SYMBOL(1, tgamma),
 SYMBOL(1, trunc)
};
static double term(const char **);
static double expr(const char **const str, const unsigned level) {
 *str += level != 0;
 for (double val = term(str);;) {
 while (isspace((unsigned char)**str))
 ++*str;
 switch (**str) {
 case '*':
 if (level > 2)
 break;
 val *= expr(str, 2);
 continue;
 case '/':
 if (level > 2)
 break;
 val /= expr(str, 2);
 continue;
 case '+':
 if (level > 1)
 break;
 val += expr(str, 1);
 continue;
 case '-':
 if (level > 1)
 break;
 val -= expr(str, 1);
 continue;
 }
 return val;
 }
}
static double term(const char **const str) {
 while (isspace((unsigned char)**str))
 ++*str;
 {
 char *end;
 const double val = strtod(*str, &end);
 if (*str != end) {
 *str = end;
 return val;
 }
 }
 if (isalpha((unsigned char)**str)) {
 const Symbol *const symbol = bsearch(str, symbols, sizeof(symbols)/sizeof(Symbol), sizeof(Symbol), compar);
 if (symbol) {
 if (!symbol->arity)
 return symbol->func0;
 while (isspace((unsigned char)**str))
 ++*str;
 if (*(*str)++ == '(') {
 double vals[symbol->arity];
 for (unsigned i = 0;;) {
 vals[i] = expr(str, 0);
 if (++i >= symbol->arity) {
 if (*(*str)++ == ')') switch (symbol->arity) {
 case 1:
 return symbol->func1(vals[0]);
 case 2:
 return symbol->func2(vals[0], vals[1]);
 case 3:
 return symbol->func3(vals[0], vals[1], vals[2]);
 }
 } else if (*(*str)++ == ',')
 continue;
 break;
 }
 }
 }
 } else switch (*(*str)++) {
 case '+':
 return term(str);
 case '-':
 return -term(str);
 case '(':
 {
 const double val = expr(str, 0);
 if (*(*str)++ == ')')
 return val;
 }
 }
 return NAN;
}
#include <stdio.h>
int main(int argc, char **argv) {
 for (int arg = 1; arg < argc; ++arg)
 if (printf("%g\n", expr(&(const char *){argv[arg]}, 0)) < 0)
 return EXIT_FAILURE;
 return EXIT_SUCCESS;
}

Question 2

Bug

memcmp(str, name, len); lets memcmp() compare the first len bytes of the data pointed to by str and name, yet some of those are pointers to a string less than len in size. What code needs is strncmp() (or perhaps even strcmp()) to not compare past the end of the string.

Bug

The *(const char **)strp += len relies on bsearch() to stop calling cmp() once a return value of 0 occurs. Although this is common, bsearch()is not specified that way. Instead the calling code should advance the pointer, not the compare function.

Pedantic Bug

str[len]-name[len] in compar() should be an unsigned char compare to well sort symbols that have a character outside the [0...CHAR_MAX] range. Yet since, presently, symbols[] only employs ASCII characters, it does not make a difference.

Non-standard macros

M_PI and its 13 friends are not part of the C standard. See Using M_PI. Consider

#ifndef M_PI
 #define M_PI 3.1415926535897932384626433832795
#endif

Lack of comments

Functions deserve at least a little description to indicate its role.

Consider long double

Consider using the most precise/widest range floating point type available.

Use a more precise output

Rather than only 6 digits, use the precision of the type:

// printf("%g\n", some_double)
printf("%.*g\n", DBL_DIG, some_double)

Missing break;

case '(': { .. deserves a break; at the end of the case or a comment indicating fall-through was desired.

double expr() missing return value;

TBD code needed to quiet "Problem description: No return, in function returning non-void".

Minor:

Excessive use of const object

object would simplify code presentation in many places where there is only a few lines of code.

Example:

 // const double val = expr(str, 0);
 double val = expr(str, 0);
 if (*(*str)++ == ')')
 return val;

Format to presentation width

Use an auto-formatter and present to the display width (e.g. ~82)

 1 2 3 4 5 6 7 8 9
1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
// const Symbol *const symbol = bsearch(str, symbols, sizeof(symbols)/sizeof(Symbol), sizeof(Symbol), compar);
 const Symbol *const symbol = bsearch(str, symbols, 
 sizeof(symbols)/sizeof(Symbol), sizeof(Symbol), compar);

Note the () are not needed with sizeof object.

Avoid names that differ only in case

// v----v v----v
const Symbol *const symbol

Vertical spacing

Between functions, types, consider a blank line.

Question 3

Oh, I tried to eliminate the excessively long lines, but looks like I missed one.

Question 4

@user16217248 Tip: use an auto-formatter. Do not manually format code.

Question 5

I'm not quite sure what you mean by making str[len]-name[len] an unsigned char. You mean like (unsigned char)(str[len]-name[len])? I don't know what that solves. Also the double expr() misses a return at the end because the 'end' is not reachable. The loop runs forever until the return statement at the end of the loop body is reached.

Question 6

@user16217248 When char is signed, str[len] is of one sign (say +50) and name[len] is the other sign (say -60), str[len]-name[len] is positive (110), yet if the strings are accessed via unsigned char *, str[len]-name[len] is like 50 - (256-60) with a negative result. Which is correct for searching a sorted list? strcmp() and memcmp() perform as if in the 2nd case. You code does not experience this issue as the functions all have ASCII names. Yet if π was added to to the calculator, we may have an issue.

Question 7

Well I could replace the return at the end of the loop with break and just put the return at the end of the function.

chux chux 36.2k2 gold badges43 silver badges96 bronze badges · Accepted Answer · 2023-10-16 21:38:39Z

Bug

memcmp(str, name, len); lets memcmp() compare the first len bytes of the data pointed to by str and name, yet some of those are pointers to a string less than len in size. What code needs is strncmp() (or perhaps even strcmp()) to not compare past the end of the string.

Bug

The *(const char **)strp += len relies on bsearch() to stop calling cmp() once a return value of 0 occurs. Although this is common, bsearch()is not specified that way. Instead the calling code should advance the pointer, not the compare function.

Pedantic Bug

str[len]-name[len] in compar() should be an unsigned char compare to well sort symbols that have a character outside the [0...CHAR_MAX] range. Yet since, presently, symbols[] only employs ASCII characters, it does not make a difference.

Non-standard macros

M_PI and its 13 friends are not part of the C standard. See Using M_PI. Consider

#ifndef M_PI
 #define M_PI 3.1415926535897932384626433832795
#endif

Lack of comments

Functions deserve at least a little description to indicate its role.

Consider long double

Consider using the most precise/widest range floating point type available.

Use a more precise output

Rather than only 6 digits, use the precision of the type:

// printf("%g\n", some_double)
printf("%.*g\n", DBL_DIG, some_double)

Missing break;

case '(': { .. deserves a break; at the end of the case or a comment indicating fall-through was desired.

double expr() missing return value;

TBD code needed to quiet "Problem description: No return, in function returning non-void".

Minor:

Excessive use of const object

object would simplify code presentation in many places where there is only a few lines of code.

Example:

 // const double val = expr(str, 0);
 double val = expr(str, 0);
 if (*(*str)++ == ')')
 return val;

Format to presentation width

Use an auto-formatter and present to the display width (e.g. ~82)

 1 2 3 4 5 6 7 8 9
1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
// const Symbol *const symbol = bsearch(str, symbols, sizeof(symbols)/sizeof(Symbol), sizeof(Symbol), compar);
 const Symbol *const symbol = bsearch(str, symbols, 
 sizeof(symbols)/sizeof(Symbol), sizeof(Symbol), compar);

Note the () are not needed with sizeof object.

Avoid names that differ only in case

// v----v v----v
const Symbol *const symbol

Vertical spacing

Between functions, types, consider a blank line.

Oh, I tried to eliminate the excessively long lines, but looks like I missed one.
@user16217248 Tip: use an auto-formatter. Do not manually format code.
I'm not quite sure what you mean by making str[len]-name[len] an unsigned char. You mean like (unsigned char)(str[len]-name[len])? I don't know what that solves. Also the double expr() misses a return at the end because the 'end' is not reachable. The loop runs forever until the return statement at the end of the loop body is reached.
@user16217248 When char is signed, str[len] is of one sign (say +50) and name[len] is the other sign (say -60), str[len]-name[len] is positive (110), yet if the strings are accessed via unsigned char *, str[len]-name[len] is like 50 - (256-60) with a negative result. Which is correct for searching a sorted list? strcmp() and memcmp() perform as if in the 2nd case. You code does not experience this issue as the functions all have ASCII names. Yet if π was added to to the calculator, we may have an issue.
Well I could replace the return at the end of the loop with break and just put the return at the end of the function.

Stack Exchange Network

C-style double expression parser

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

C-style double expression parser

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions