Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit b9234d8

Browse files
implement libm::exp and its variants for i586 with inline assembly to fix precision issues
1 parent 82a32c6 commit b9234d8

File tree

9 files changed

+104
-2
lines changed

9 files changed

+104
-2
lines changed

‎libm-test/src/precision.rs‎

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
8383
Bn::Tgamma => 20,
8484
};
8585

86+
// These have a separate implementation on i586
87+
if cfg!(x86_no_sse) {
88+
match ctx.base_name {
89+
Bn::Exp => ulp = 1,
90+
Bn::Exp2 => ulp = 1,
91+
Bn::Exp10 => ulp = 1,
92+
_ => (),
93+
}
94+
}
95+
8696
// There are some cases where musl's approximation is less accurate than ours. For these
8797
// cases, increase the ULP.
8898
if ctx.basis == Musl {
@@ -124,8 +134,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
124134
Id::Asinh => ulp = 3,
125135
Id::Asinhf => ulp = 3,
126136
Id::Cbrt => ulp = 1,
127-
Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
128-
Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
129137
Id::Log1p | Id::Log1pf => ulp = 2,
130138
Id::Tan => ulp = 2,
131139
_ => (),

‎libm/src/math/arch/i586.rs‎

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,56 @@ pub fn floor(mut x: f64) -> f64 {
6060
}
6161
x
6262
}
63+
64+
macro_rules! x87exp {
65+
($float_ty:ident, $word_size:literal, $fn_name:ident, $load_op:literal) => {
66+
pub fn $fn_name(mut x: $float_ty) -> $float_ty { unsafe {
67+
core::arch::asm!(
68+
// Prepare the register stack as
69+
// ```
70+
// st(0) = y = x*log2(base)
71+
// st(1) = 1.0
72+
// st(2) = round(y)
73+
// ```
74+
concat!($load_op, " ", $word_size, " ptr [{x}]"),
75+
"fld1",
76+
"fld st(1)",
77+
"frndint",
78+
"fxch st(2)",
79+
80+
// Compare y with round(y) to determine if y is finite and
81+
// not an integer. If so, compute `exp2(y - round(y))` into
82+
// st(1). Otherwise skip ahead with `st(1) = 1.0`
83+
"fucom st(2)",
84+
"fstsw ax",
85+
"test ax, 0x4000",
86+
"jnz 2f",
87+
"fsub st(0), st(2)", // st(0) = y - round(y)
88+
"f2xm1", // st(0) = 2^st(0) - 1.0
89+
"fadd st(1), st(0)", // st(1) = 1 + st(0) = exp2(y - round(y))
90+
"2:",
91+
92+
// Finally, scale by `exp2(round(y))` and clear the stack.
93+
"fstp st(0)",
94+
"fscale",
95+
concat!("fstp ", $word_size, " ptr [{x}]"),
96+
"fstp st(0)",
97+
x = in(reg) &mut x,
98+
out("ax") _,
99+
out("st(0)") _, out("st(1)") _,
100+
out("st(2)") _, out("st(3)") _,
101+
out("st(4)") _, out("st(5)") _,
102+
out("st(6)") _, out("st(7)") _,
103+
options(nostack),
104+
);
105+
x
106+
}}
107+
};
108+
}
109+
110+
x87exp!(f32, "dword", x87_exp2f, "fld");
111+
x87exp!(f64, "qword", x87_exp2, "fld");
112+
x87exp!(f32, "dword", x87_exp10f, "fldl2t\nfmul");
113+
x87exp!(f64, "qword", x87_exp10, "fldl2t\nfmul");
114+
x87exp!(f32, "dword", x87_expf, "fldl2e\nfmul");
115+
x87exp!(f64, "qword", x87_exp, "fldl2e\nfmul");

‎libm/src/math/arch/mod.rs‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,8 @@ cfg_if! {
4848
pub use i586::{ceil, floor};
4949
}
5050
}
51+
cfg_if! {
52+
if #[cfg(x86_no_sse)] {
53+
pub use i586::{x87_exp10f, x87_exp10, x87_expf, x87_exp, x87_exp2f, x87_exp2};
54+
}
55+
}

‎libm/src/math/exp.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
8383
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
8484
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
8585
pub fn exp(mut x: f64) -> f64 {
86+
select_implementation! {
87+
name: x87_exp,
88+
use_arch_required: x86_no_sse,
89+
args: x,
90+
}
91+
8692
let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
8793
let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149
8894

‎libm/src/math/exp10.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f64] = &[
99
/// Calculates 10 raised to the power of `x` (f64).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10(x: f64) -> f64 {
12+
select_implementation! {
13+
name: x87_exp10,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modf(x);
1319
let u: u64 = n.to_bits();
1420
/* fabs(n) < 16 without raising invalid on nan */

‎libm/src/math/exp10f.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f32] = &[
99
/// Calculates 10 raised to the power of `x` (f32).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10f(x: f32) -> f32 {
12+
select_implementation! {
13+
name: x87_exp10f,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modff(x);
1319
let u = n.to_bits();
1420
/* fabsf(n) < 8 without raising invalid on nan */

‎libm/src/math/exp2.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ static TBL: [u64; TBLSIZE * 2] = [
324324
/// Calculate `2^x`, that is, 2 raised to the power `x`.
325325
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
326326
pub fn exp2(mut x: f64) -> f64 {
327+
select_implementation! {
328+
name: x87_exp2,
329+
use_arch_required: x86_no_sse,
330+
args: x,
331+
}
332+
327333
let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64;
328334
let p1 = f64::from_bits(0x3fe62e42fefa39ef);
329335
let p2 = f64::from_bits(0x3fcebfbdff82c575);

‎libm/src/math/exp2f.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ static EXP2FT: [u64; TBLSIZE] = [
7575
/// Calculate `2^x`, that is, 2 raised to the power `x`.
7676
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
7777
pub fn exp2f(mut x: f32) -> f32 {
78+
select_implementation! {
79+
name: x87_exp2f,
80+
use_arch_required: x86_no_sse,
81+
args: x,
82+
}
83+
7884
let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
7985
let p1 = f32::from_bits(0x3f317218);
8086
let p2 = f32::from_bits(0x3e75fdf0);

‎libm/src/math/expf.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
3232
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
3333
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
3434
pub fn expf(mut x: f32) -> f32 {
35+
select_implementation! {
36+
name: x87_expf,
37+
use_arch_required: x86_no_sse,
38+
args: x,
39+
}
40+
3541
let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
3642
let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */
3743
let mut hx = x.to_bits();

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /