Commit b9234d8

committed

implement libm::exp and its variants for i586 with inline assembly to fix precision issues

1 parent 82a32c6 commit b9234d8Copy full SHA for b9234d8

File tree

9 files changed

+104

-2

lines changed

libm-test/src
- precision.rs
libm/src/math
- arch
  - i586.rs
  - mod.rs
- exp.rs
- exp10.rs
- exp10f.rs
- exp2.rs
- exp2f.rs
- expf.rs

9 files changed

+104

-2

lines changed

`‎libm-test/src/precision.rs‎`

Lines changed: 10 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -83,6 +83,16 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {`
`83`	`83`	`Bn::Tgamma => 20,`
`84`	`84`	`};`
`85`	`85`
	`86`	`+ // These have a separate implementation on i586`
	`87`	`+ if cfg!(x86_no_sse) {`
	`88`	`+ match ctx.base_name {`
	`89`	`+ Bn::Exp => ulp = 1,`
	`90`	`+ Bn::Exp2 => ulp = 1,`
	`91`	`+ Bn::Exp10 => ulp = 1,`
	`92`	`+ _ => (),`
	`93`	`+ }`
	`94`	`+ }`
	`95`	`+`
`86`	`96`	`// There are some cases where musl's approximation is less accurate than ours. For these`
`87`	`97`	`// cases, increase the ULP.`
`88`	`98`	`if ctx.basis == Musl {`
`@@ -124,8 +134,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {`
`124`	`134`	`Id::Asinh => ulp = 3,`
`125`	`135`	`Id::Asinhf => ulp = 3,`
`126`	`136`	`Id::Cbrt => ulp = 1,`
`127`		`- Id::Exp10 \| Id::Exp10f => ulp = 1_000_000,`
`128`		`- Id::Exp2 \| Id::Exp2f => ulp = 10_000_000,`
`129`	`137`	`Id::Log1p \| Id::Log1pf => ulp = 2,`
`130`	`138`	`Id::Tan => ulp = 2,`
`131`	`139`	`_ => (),`

`‎libm/src/math/arch/i586.rs‎`

Lines changed: 53 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -60,3 +60,56 @@ pub fn floor(mut x: f64) -> f64 {`
`60`	`60`	`}`
`61`	`61`	`x`
`62`	`62`	`}`
	`63`	`+`
	`64`	`+macro_rules! x87exp {`
	`65`	`+ ($float_ty:ident, $word_size:literal, $fn_name:ident, $load_op:literal) => {`
	`66`	`+ pub fn $fn_name(mut x: $float_ty) -> $float_ty { unsafe {`
	`67`	`+ core::arch::asm!(`
	`68`	`+ // Prepare the register stack as`
	`69`	+ // ```
	`70`	`+ // st(0) = y = x*log2(base)`
	`71`	`+ // st(1) = 1.0`
	`72`	`+ // st(2) = round(y)`
	`73`	+ // ```
	`74`	`+ concat!($load_op, " ", $word_size, " ptr [{x}]"),`
	`75`	`+ "fld1",`
	`76`	`+ "fld st(1)",`
	`77`	`+ "frndint",`
	`78`	`+ "fxch st(2)",`
	`79`	`+`
	`80`	`+ // Compare y with round(y) to determine if y is finite and`
	`81`	+ // not an integer. If so, compute `exp2(y - round(y))` into
	`82`	+ // st(1). Otherwise skip ahead with `st(1) = 1.0`
	`83`	`+ "fucom st(2)",`
	`84`	`+ "fstsw ax",`
	`85`	`+ "test ax, 0x4000",`
	`86`	`+ "jnz 2f",`
	`87`	`+ "fsub st(0), st(2)", // st(0) = y - round(y)`
	`88`	`+ "f2xm1", // st(0) = 2^st(0) - 1.0`
	`89`	`+ "fadd st(1), st(0)", // st(1) = 1 + st(0) = exp2(y - round(y))`
	`90`	`+ "2:",`
	`91`	`+`
	`92`	+ // Finally, scale by `exp2(round(y))` and clear the stack.
	`93`	`+ "fstp st(0)",`
	`94`	`+ "fscale",`
	`95`	`+ concat!("fstp ", $word_size, " ptr [{x}]"),`
	`96`	`+ "fstp st(0)",`
	`97`	`+ x = in(reg) &mut x,`
	`98`	`+ out("ax") _,`
	`99`	`+ out("st(0)") _, out("st(1)") _,`
	`100`	`+ out("st(2)") _, out("st(3)") _,`
	`101`	`+ out("st(4)") _, out("st(5)") _,`
	`102`	`+ out("st(6)") _, out("st(7)") _,`
	`103`	`+ options(nostack),`
	`104`	`+ );`
	`105`	`+ x`
	`106`	`+ }}`
	`107`	`+ };`
	`108`	`+}`
	`109`	`+`
	`110`	`+x87exp!(f32, "dword", x87_exp2f, "fld");`
	`111`	`+x87exp!(f64, "qword", x87_exp2, "fld");`
	`112`	`+x87exp!(f32, "dword", x87_exp10f, "fldl2t\nfmul");`
	`113`	`+x87exp!(f64, "qword", x87_exp10, "fldl2t\nfmul");`
	`114`	`+x87exp!(f32, "dword", x87_expf, "fldl2e\nfmul");`
	`115`	`+x87exp!(f64, "qword", x87_exp, "fldl2e\nfmul");`