Output
\20ドル\$ float64s with absolute value between 1 and 2 inclusive whose sum has the largest possible relative error. We can assume the true sum is not zero.
The sum is to be computed from left to right in a loop in the obvious fashion.
The relative error is abs(true sum - computed sum)/true sum.
Score
The relative error you achieve. Larger is better.
This is different Find 10 float64s that give the least accurate sum which asks about absolute error. Good answers to that question will not be good answers to this question.
2 Answers 2
63x
#include <stdio.h>
#include <initializer_list>
static double aim = 0;
const double eps = 2.220446049250313e-16;
// 1.0000000000000000 1.9999999999999998 1.9999999999999996 1.9999999999999996 1.9999999999999993 -1.0000000000000004 -1.9999999999999993 -1.9999999999999996 -1.9999999999999993 -1.9999999999999993 => 14.0000000000000000
void f() {double a6=0,a7=0,a8=0,a9=0,a10=0,b6=0,b7=0,b8=0,b9=0,b10=0;
double a1 = 1;
for (double a2: {2., 2-eps})
for (double a3: {a2, a2-eps})
for (double a4: {a3, a3-eps})
for (double a5: {a4, a4-eps})
for (double a6: {a5, a5-eps})
for (double a7: {a6, a6-eps})
for (double a8: {a7, a7-eps})
for (double a9: {a8, a8-eps})
for (double a10: {a9, a9-eps})
for (double b1=-1; b1>-1-100*eps; b1-=eps)
for (double x=-2; x<-2+100*eps; x+=eps)
for (double b2: {x, x+eps, x+2*eps})
for (double b3: {x, x+eps, x+2*eps})
for (double b4: {x, x+eps, x+2*eps})
for (double b5: {x, x+eps, x+2*eps})
for (double b6: {x, x+eps, x+2*eps})
for (double b7: {x, x+eps, x+2*eps})
for (double b8: {x, x+eps, x+2*eps})
for (double b9: {x, x+eps, x+2*eps})
for (double b10: {x, x+eps, x+2*eps}) {{
long double m = (long double) a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8 + b9 + b10;
if (m == 0) goto r;
double q = (a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8 + b9 + b10 - m)/m;
if (q<0) q=-q;
if (q>aim) {
printf("%.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f => %.16f\n", a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, aim=q); }
}r:;
}
}
int main() {
f();
}
1.0000000000000000 1.9999999999999998 1.9999999999999996 1.9999999999999996 1.9999999999999993 1.9999999999999991 1.9999999999999991 1.9999999999999991 1.9999999999999989 1.9999999999999987 -1.0000000000000160 -1.9999999999999973 -1.9999999999999973 -1.9999999999999973 -1.9999999999999973 -1.9999999999999973 -1.9999999999999976 -1.9999999999999978 -1.9999999999999976 -1.9999999999999978 => 63.0000000000000000
Rust, Relative error 66.0
#![feature(float_next_up_down)]
fn main() {
let e = 2.0f64.powi(-52);
let x1 = 1.0f64;
let x2 = 2.0f64 - e;
let x3 = 2.0f64 - e * 2.0;
let x4 = 2.0f64 - e * 4.0;
let x5 = 2.0f64 - e * 8.0;
let y1 = -1.875f64 - e * 8.0;
let y2 = -1.875f64 - e * 4.0;
let y3 = -1.875f64 - e * 2.0;
let y4 = -1.875f64 - e * 1.0;
let y5 = -2.0f64;
let y6 = -2.0f64 + e * 65.0;
let v = vec![
x1, x2, x3, x3, x4, x4, x4, x4, x5, x5,
y1, y2, y2, y2, y2, y3, y3, y4, y5, y6
];
let errors = vec![
x1 - 1.0, x2 - 2.0, x3 - 2.0, x3 - 2.0, x4 - 2.0,
x4 - 2.0, x4 - 2.0, x4 - 2.0, x5 - 2.0, x5 - 2.0,
y1 + 1.875, y2 + 1.875, y2 + 1.875, y2 + 1.875, y2 + 1.875,
y3 + 1.875, y3 + 1.875, y4 + 1.875, y5 + 2.0, y6 + 2.0
];
let mut sum = 0.0;
let mut true_sum = 0.0;
for i in 0..v.len() {
sum += v[i];
true_sum += errors[i];
}
println!("{:?}", v);
println!("True: {:.60}", true_sum);
println!("Got: {:.60}", sum);
println!("Rel.Error: {:.10}", (sum - true_sum).abs() / true_sum.abs());
}
Output:
[1.0, 1.9999999999999998, 1.9999999999999996, 1.9999999999999996,
1.9999999999999991, 1.9999999999999991, 1.9999999999999991,
1.9999999999999991, 1.9999999999999982, 1.9999999999999982,
-1.8750000000000018, -1.8750000000000009, -1.8750000000000009,
-1.8750000000000009, -1.8750000000000009, -1.8750000000000004,
-1.8750000000000004, -1.8750000000000002, -2.0, -1.9999999999999856]
True: -0.000000000000000222044604925031308084726333618164062500000000
Got: 0.000000000000014432899320127035025507211685180664062500000000
Rel.Error: 66.0000000000
Got a hint from l4m2's solution that I can keep increasing the error while coming back to e
.
Rust, Relative error 37.0
#![feature(float_next_up_down)]
fn main() {
let x1 = 2.0f64;
let x2 = x1.next_down(); // -1e
let x3 = x2.next_down(); // -2e
let x4 = x3.next_down().next_down(); // -4e
let x5 = x4.next_down().next_down().next_down().next_down(); // -8e
let y1 = -2.0f64;
let y2 = -2.0f64 + 2.0f64.powi(-52) * 36.0;
let v = vec![
x1, x2, x3, x3, x4, x4, x4, x4, x5, x5, // -37e
y1, y1, y1, y1, y1, y1, y1, y1, y1, y2
];
let errors = vec![
x1 - 2.0, x2 - 2.0, x3 - 2.0, x3 - 2.0, x4 - 2.0,
x4 - 2.0, x4 - 2.0, x4 - 2.0, x5 - 2.0, x5 - 2.0,
y1 + 2.0, y1 + 2.0, y1 + 2.0, y1 + 2.0, y1 + 2.0,
y1 + 2.0, y1 + 2.0, y1 + 2.0, y1 + 2.0, y2 + 2.0
];
let mut sum = 0.0;
let mut true_sum = 0.0;
for i in 0..20 {
sum += v[i];
true_sum += errors[i];
}
println!("{:?}", v);
println!("True: {:.60}", true_sum);
println!("Got: {:.60}", sum);
println!("Rel.Error: {:.10}", (sum - true_sum).abs() / true_sum.abs());
}
Output:
[2.0, 1.9999999999999998, 1.9999999999999996, 1.9999999999999996, 1.9999999999999991,
1.9999999999999991, 1.9999999999999991, 1.9999999999999991, 1.9999999999999982,
1.9999999999999982, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0,
-1.999999999999992]
True: -0.000000000000000222044604925031308084726333618164062500000000
Got: 0.000000000000007993605777301127091050148010253906250000000000
Rel.Error: 37.0000000000
x.next_down()
(nightly feature) finds the closest f64 that is lower than x
.
The construction is similar to the absolute error version: accumulate large integer values with increasing amount of error. However, it is necessary to make the sum as small in magnitude as possible to amplify the relative error, so only the first 10 numbers are used to accumulate the error and the other 10 are used to cancel the integer part.
All f64 numbers between 1 and 2 inclusive are uniformly spaced at the units of 2^-52
, and rounding errors at larger ranges occur as its multiples, so it is impossible to make the true sum smaller than that in magnitude. It is thus the "unit of error" (e
in the comments in code). The true sum is -1e
while the calculated sum is +36e
, making the relative error exactly 37. (not percent; in percentage it would be 3700%!)
-
\$\begingroup\$ Its interesting the relative error is an integer. What would the max relative error be if the numbers were restricted between 0 and 1 in absolute value? \$\endgroup\$Simd– Simd2024年07月09日 04:14:45 +00:00Commented Jul 9, 2024 at 4:14
-
1
-
\$\begingroup\$
x1 = 1.0f64
andy5 = -2.0f64
each seem to have errors of zero. Is there a reason you can't build-in some extra error to these, too? \$\endgroup\$Dominic van Essen– Dominic van Essen2024年07月09日 06:48:52 +00:00Commented Jul 9, 2024 at 6:48 -
\$\begingroup\$ @DominicvanEssen The error of size
e
occurs only when the result of an operation is strictly greater than 2.x1
is the first term, so it is always exact. After addingy5
, the magnitude cannot be significantly greater than 2, but maybe I didn't try to squeezee
hard enough. \$\endgroup\$Bubbler– Bubbler2024年07月09日 08:06:31 +00:00Commented Jul 9, 2024 at 8:06 -
\$\begingroup\$ I want to understand how to calculate the true sum. While, i mean, we need the true sum before "sum", but after calculate "2.0f64 - e * 2.0". That's because "2.0f64 - e * 2.0" is not a part of sum calculating. So we need to calculate the the value after "2.0f64 - e * 2.0" first... \$\endgroup\$tsh– tsh2024年07月10日 02:12:15 +00:00Commented Jul 10, 2024 at 2:12