I am working on a polynomial algorithm which requires calculations in large numbers, up to e+38. I am using a 32-bit system with compiler support of 32 bits for long
/float
/double
. So far, by searching online, I've learned that single-precision floating points (FPs) can be used for double-precision FPs.
From this question asked by someone earlier (Emulate double
using 2 float
s), I found this paper which has the algorithm to work with double-precision FPs in GPUs. It is too confusing for me to implement in C. I just need four basic mathematical operations. Is there any way I could find an example for this which will help me understand it better?
Here is the code I am working on. It might have shortcomings I cannot see, but that is pretty much what I am trying to implement. In the algorithm, POLYNOMIAL_ORDER
should be able to go up to fourth order (can settle at third order if the standard deviation is smaller). A few things I am not sure about are:
- Whether procedures
make_float()
andmake_float()
are correct or not - Use of
make_float()
in the program
#define POLYNOMIAL_ORDER (3)
#define TC_TABLE_SIZE (14)
typedef struct vector_float2{
float x;
float y;
}float2;
typedef struct
{
float tc0;
float tc1;
float tc2;
float tc3;
}POLYNOMIALS;
typedef struct {
int16_t Temp;
int16_t Comp;
} TempCompPair;
volatile TempCompPair TCtable[TC_TABLE_SIZE] = {{22452,1651},
{25318,1444},
{28268,1133},
{31120,822},
{34027,511},
{36932,185},
{39770,-81},
{42685,-288},
{45531,-407},
{48425,-632},
{51401,-703},
{54460,-1143},
{57202,-1420},
{60027,-1652}};
POLYNOMIALS polynomials;
float matrix[TC_TABLE_SIZE][TC_TABLE_SIZE] = {0};
float average[TC_TABLE_SIZE] = {0};
float make_float(float x, float y)
{
return x+y;
}
float2 make_float2(float a, float b)
{
float2 f2 = {a,b};
return f2;
}
float2 quickTwoSum(float a, float b)
{
float s = a+b;
float e = b - (s - a);
float2 result = {s, e};
return result;
}
float2 twoSum(float a, float b)
{
volatile float s = a + b;
float v = s - a;
float e = (a - (s - v)) + (b - v);
float2 result = {s , e};
return result;
}
float2 df64_add(float2 a, float2 b)
{
float2 s,t;
s = twoSum(a.x, b.x);
t = twoSum(a.y, b.y);
s.y += t.x;
s = quickTwoSum(s.x, s.y);
s.y += t.y;
s = quickTwoSum(s.x, s.y);
return s;
}
float2 split(float a)
{
const float split = 4097; //(1<<12) + 1
float t = a *split;
float a_hi = t - (t - a);
float a_lo = a - a_hi;
float2 result = {a_hi, a_lo};
return result;
}
float2 twoProd(float a, float b)
{
float p = a*b;
float2 aS = split(a);
float2 bS = split(b);
float err = ((aS.x * bS.x - p)
+ aS.x * bS.y + aS.y * bS.x)
+ aS.y * bS.y;
float2 result = {p, err};
return result;
}
float2 df64_mult(float2 a, float2 b)
{
float2 p;
p = twoProd(a.x,b.x);
p.y += a.x * b.y;
p.y += a.y * b.x;
p = quickTwoSum(p.x,p.y);
return p;
}
float2 calculate_power(float base, int pow)
{
int i = 0;
float2 base_f2 = make_float2(base,0);
float2 result_f2 = {1,0};
if(pow == 0)
{
return result_f2;
}
if(pow > 0)
{
if(pow == 1)
{
return base_f2;
}
else
{
for(i = 0; i < pow; i++)
{
result_f2 = df64_mult(result_f2,base_f2);
}
return result_f2;
}
}
else
{
return result_f2;
//Mechanism for negative powers
}
}
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float2 sum = {0,0};
float2 result0 = {0,0};
float2 result1 = {0,0};
float x[TC_TABLE_SIZE];
float y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (float) TCtable[i].Temp;
y[i] = (float) TCtable[i].Comp;
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum.x = 0;
sum.y = 0;
for(k = 0; k < size; k++)
{
// Expression simplified below: **sum += pow(x[k],i+j)**
result0 = calculate_power(x[k], i+j);
sum = df64_add(result0,sum);
}
matrix[i][j] = make_float(sum.x,sum.y);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum.x = 0;
sum.y = 0;
for(j = 0; j < size; j++)
{
// Expression simplified below: **sum += y[j] * pow(x[j],i)**
result0 = calculate_power(x[j], i);
result1 = df64_mult( result0 , make_float2(y[j],0) );
sum = df64_add(result1,sum);
}
average[i] = make_float(sum.x,sum.y);
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
temp = matrix[j][i]/matrix[i][i];
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
}
average[j] -= temp*average[i];
}
}
}
if(matrix[0][0] != 0)
{
polynomials.tc0 = average[0]/matrix[0][0];
}
if(matrix[1][1] != 0)
{
polynomials.tc1 = average[1]/matrix[1][1];
}
if(matrix[2][2] != 0)
{
polynomials.tc2 = average[2]/matrix[2][2];
}
if(matrix[3][3] != 0)
{
polynomials.tc3 = average[3]/matrix[3][3];
}
}
and then use the struct polynomials.tc0/1/2/3 in this expression:
// y = x^3 * d + x^2 * c + x^1 * b + a ;
double calculate_equation(uint16_t TEMP)
{
double Y;
if(POLYNOMIAL_ORDER == 1)
{
Y = polynomials.tc1*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 2)
{
Y = (polynomials.tc2 * (double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 3)
{
Y = ((polynomials.tc3 * (double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 4)
{
Y = (((polynomials.tc4 * (double)TEMP + polynomials.tc3)*(double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
return Y;
}
And standard deviation is calculated as follows:
//sqrt(sigma(error^2))
for(i = 0; i < TC_TABLE_SIZE; i++)
{
actual_comp[i] =(int) calculate_equation(TCtable[i].Temp);
error[i] = TCtable[i].Comp - actual_comp[i] ;
error_sqr += error[i]*error[i];
printf("%u\t%d\t\t%e\n", TCtable[i].Temp, TCtable[i].Comp, actual_comp[i] );
}
error_sqrt = sqrt(error_sqr);
1 Answer 1
I was able to implement this code without using double precision as the calculations were in the range of Float. Here's my implementation, let me know if I can optimize it better.
typedef struct
{ int64_t tc0;
int64_t tc1;
int64_t tc2;
int64_t tc3;
int64_t tc4;
}POLYNOMIALS;
POLYNOMIALS polynomials = {0,0,0,0,0};
int16_t TempCompIndex;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
float matrix[POLYNOMIAL_ORDER+1][POLYNOMIAL_ORDER+1] = {0};
float average[POLYNOMIAL_ORDER+1] = {0};
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float sum = 0;
float powr = 0;
float prod;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (int64_t) TCtable[i].Temp;
y[i] = (int64_t) TCtable[i].Comp<<PRECISION;
printf("x: %lld, y:%lld\n",x[i],y[i]);
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum = 0;
powr = 0;
for(k = 0; k < size; k++)
{
//printf("x[%d]: %ld, i: %d ,j: %d ", k, x[k],i,j);
powr = pow(x[k],i+j);
//printf("Power: %f, sum: %f\n ",powr,sum);
sum += powr;
//printf("%f\r\n",powr);
//printf("sum: %lf\n",sum );
}
matrix[i][j] = sum;
printf("sum: %g\n",sum);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum = 0;
powr = 0;
for(j = 0; j < size; j++)
{
//sum += y[j] * pow(x[j],i)
//printf("sum: %lf, y[%d]: %lf, x[%d]: %lf^%d ",sum,j,y[j], i, x[j],i);
//printf("x[%d]:%lld ^ %d\t",j,x[j],i);
powr = (float) pow(x[j],i);
printf("powr: %f\t",powr);
prod = (float) y[j] * powr;
printf("prod:%f \t %lld \t", prod,y[j]);
sum += (float) prod;
printf("sum: %f \n",sum);
}
average[i] = sum;
//printf("#Avg: %f\n",average[i]);
}
printf("\n\n");
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
//printf("matrix%d%d: %g / matrix%d%d: %g =\t ",j,i,matrix[j][i],i,i,matrix[i][i]);
temp = matrix[j][i]/matrix[i][i];
//printf("Temp: %g\n",temp);
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
//printf("matrix[%d][%d]:%g, %g, matrix[%d][%d]:%g\n",j,k,matrix[j][k], temp,i,k,matrix[i][k]);
}
//printf("\n\n");
//print_matrix();
printf("\n\n");
//printf("avg%d: %g\ttemp: %g\tavg%d: %g\n\n",j,average[j],temp,i,average[i]);
average[j] -= temp*average[i];
printf("#Avg%d:%g\n",j,average[j]);
//print_average();
}
}
}
print_matrix();
print_average();
/* Calculate polynomial Coefficients (n+1) based on the POLYNOMIAL_ORDER (n) */
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
if(matrix[0][0] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
}
#elif POLYNOMIAL_ORDER == 1
if(matrix[1][1] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
}
#elif POLYNOMIAL_ORDER == 2
if(matrix[2][2] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
}
#elif POLYNOMIAL_ORDER == 3
if(matrix[3][3] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
}
#elif POLYNOMIAL_ORDER == 4
if(matrix[4][4] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
polynomials.tc4 = (int64_t) (average[4]/matrix[4][4]);
}
#endif
}
int16_t calculate_equation(uint16_t TEMP)
{
int64_t Y = 0;
int16_t TempComp = 0;
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
Y = polynomials.tc0;
#elif POLYNOMIAL_ORDER == 1
Y = polynomials.tc1* ((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 2
Y = (polynomials.tc2 * ((int64_t)TEMP) + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 3
Y = ((polynomials.tc3 * ((int64_t)TEMP) + polynomials.tc2)*((int64_t)TEMP) + polynomials.tc1)*((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 4
Y = (((polynomials.tc4 * (int64_t)TEMP + polynomials.tc3)*(int64_t)TEMP + polynomials.tc2)*(int64_t)TEMP + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#endif
TempComp = (int16_t) (Y>>PRECISION_BITS);
return TempComp;
}
void main(){
int16_t TempComp = 0;
TempCompValue = (int16_t) calculate_equation(Mon_Temp);
}
Note: Calculate_Equation() is being called once a second and it is required to not use float in order to avoid floating point arithmetic, hence I am using non-float variables in that function.
It is working right for me and haven't discovered any bug after initial testing. Thanks every one for taking interest in my post, if not the answer, got to learn some new techniques. And thanks @chux.
-
1\$\begingroup\$ (I think it better to post a (cross-linked) follow-up question.) Don't use
#if ID == 0 s0 #elif ID == 1 s0 s1 #elif ID == 2 s0 s1 s2 ...
- use#if ID <= 0 s0 #if ID <= 1 s1 #if ID <= 2 s2 ...#
annoying_amount_of_
endif
. \$\endgroup\$greybeard– greybeard2017年08月08日 19:18:07 +00:00Commented Aug 8, 2017 at 19:18 -
\$\begingroup\$ @greybeard: Thanks for the suggestion. Updated in my code. \$\endgroup\$Rishit– Rishit2017年08月11日 20:14:42 +00:00Commented Aug 11, 2017 at 20:14
bignum.h
\$\endgroup\$