musl - musl - an implementation of the standard library for Linux-based systems

index : musl
musl - an implementation of the standard library for Linux-based systems
summary refs log tree commit diff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013年08月15日 10:56:57 +0000
committerSzabolcs Nagy <nsz@port70.net>2013年08月15日 10:56:57 +0000
commit31c5fb80b9eae86f801be4f46025bc6532a554c5 (patch)
treec5d0912699727ebd96bb5194334ee4dd33dc9c5b
parent1b3973fb43fbef80dab1dfc9c788783e78ab5043 (diff)
downloadmusl-31c5fb80b9eae86f801be4f46025bc6532a554c5.tar.gz
math: fix x86 asin, atan, exp, log1p to raise underflow
underflow is raised by an inexact subnormal float store, since subnormal operations are slow, check the underflow flag and skip the store if it's already raised
Diffstat
-rw-r--r--src/math/i386/asin.s 23
-rw-r--r--src/math/i386/atan.s 10
-rw-r--r--src/math/i386/atanf.s 12
-rw-r--r--src/math/i386/exp.s 37
-rw-r--r--src/math/i386/log1p.s 9
-rw-r--r--src/math/i386/log1pf.s 10
6 files changed, 98 insertions, 3 deletions
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
index 932c7542..a9f691bf 100644
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -2,7 +2,18 @@
.type asinf,@function
asinf:
flds 4(%esp)
- jmp 1f
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx01000000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
.global asinl
.type asinl,@function
@@ -14,6 +25,16 @@ asinl:
.type asin,@function
asin:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx00200000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
1: fld %st(0)
fld1
fsub %st(0),%st(1)
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
index 7e28b395..d73137b2 100644
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -2,6 +2,16 @@
.type atan,@function
atan:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx00200000,%eax
+ jb 1f
fld1
fpatan
ret
+ # subnormal x, return x with underflow
+1: fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
index 3cd40233..8caddefa 100644
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -2,6 +2,18 @@
.type atanf,@function
atanf:
flds 4(%esp)
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx01000000,%eax
+ jb 1f
fld1
fpatan
ret
+ # subnormal x, return x with underflow
+1: fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index e3b42af5..e5f54588 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,18 @@
.type expm1f,@function
expm1f:
flds 4(%esp)
- jmp 1f
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx01000000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
.global expm1l
.type expm1l,@function
@@ -14,10 +25,32 @@ expm1l:
.type expm1,@function
expm1:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp 0ドルx00200000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and 16,ドル%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
1: fldl2e
fmulp
+ mov 0ドルxc2820000,%eax
+ push %eax
+ flds (%esp)
+ pop %eax
+ fucomp %st(1)
+ fnstsw %ax
+ sahf
fld1
- fld %st(1)
+ jb 1f
+ # x*log2e < -65, return -1 without underflow
+ fstp %st(1)
+ fchs
+ ret
+1: fld %st(1)
fabs
fucom %st(1)
fnstsw %ax
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
index 9971e53c..6b6929c7 100644
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -7,9 +7,18 @@ log1p:
fldl 4(%esp)
cmp 0ドルx3fd28f00,%eax
ja 1f
+ cmp 0ドルx00100000,%eax
+ jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
+ # subnormal x, return x with underflow
+2: fnstsw %ax
+ and 16,ドル%ax
+ jnz 1f
+ fsts 4(%esp)
+ fstp %st(1)
+1: ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
index 2680a8a6..c0bcd30f 100644
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -7,9 +7,19 @@ log1pf:
flds 4(%esp)
cmp 0ドルx3e940000,%eax
ja 1f
+ cmp 0ドルx00800000,%eax
+ jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
+ # subnormal x, return x with underflow
+2: fnstsw %ax
+ and 16,ドル%ax
+ jnz 1f
+ fxch
+ fmul %st(1)
+ fstps 4(%esp)
+1: ret
generated by cgit v1.2.1 (git 2.18.0) at 2025年10月05日 13:52:31 +0000

AltStyle によって変換されたページ (->オリジナル) /