aarch64: Implement fma* and fms* - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:42:51 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:42:51 -0300
commitf9c34dd53a5f5782891b5c87f710af05ad31adfb (patch)
tree668658df55d63d3017625188cda577a99b605edb
parent7a8ab7fd0b1ac5f7e6944f74d71a8bead2a9950f (diff)
downloadlightning-f9c34dd53a5f5782891b5c87f710af05ad31adfb.tar.gz
aarch64: Implement fma* and fms*
While what is "really" important is fmar*, aarch64 would benefit from other variants, and fms* needs to negate the result, as it actually provides "r0 = r3 + (-r2) * r1".
Diffstat
-rw-r--r--lib/jit_aarch64-fpu.c 55
-rw-r--r--lib/jit_aarch64.c 20
2 files changed, 75 insertions, 0 deletions
diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c
index 74d4829..8ca39cf 100644
--- a/lib/jit_aarch64-fpu.c
+++ b/lib/jit_aarch64-fpu.c
@@ -31,6 +31,8 @@
# define A64_FABS 0x1e20c000
# define A64_FNEG 0x1e214000
# define A64_FSQRT 0x1e21c000
+# define A64_FMADD 0x1f000000
+# define A64_FMSUB 0x1f008000
# define A64_FCVTS 0x1e224000
# define A64_FCVTD 0x1e22c000
# define A64_FMUL 0x1e200800
@@ -61,6 +63,10 @@
# define FNEGD(Rd,Rn) osvv_(A64_FNEG,1,Rd,Rn)
# define FSQRTS(Rd,Rn) osvv_(A64_FSQRT,0,Rd,Rn)
# define FSQRTD(Rd,Rn) osvv_(A64_FSQRT,1,Rd,Rn)
+# define FMADDS(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,0,Rd,Rn,Rm,Ra)
+# define FMADDD(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,1,Rd,Rn,Rm,Ra)
+# define FMSUBS(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,0,Rd,Rn,Rm,Ra)
+# define FMSUBD(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,1,Rd,Rn,Rm,Ra)
# define FADDS(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm)
# define FADDD(Rd,Rn,Rm) osvvv(A64_FADD,1,Rd,Rn,Rm)
# define FADDV(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm)
@@ -73,6 +79,9 @@
# define osvvv(Op,Sz,Rd,Rn,Rm) _osvvv(_jit,Op,Sz,Rd,Rn,Rm)
static void _osvvv(jit_state_t*,jit_int32_t,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
+# define osvvvv(Op,Sz,Rd,Rn,Rm,Ra) _osvvvv(_jit,Op,Sz,Rd,Rn,Rm,Ra)
+static void _osvvvv(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define osvv_(Op,Sz,Rd,Rn) _osvv_(_jit,Op,Sz,Rd,Rn)
static void _osvv_(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -108,6 +117,10 @@ static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
# define absr_f(r0,r1) FABSS(r0,r1)
# define negr_f(r0,r1) FNEGS(r0,r1)
# define sqrtr_f(r0,r1) FSQRTS(r0,r1)
+# define fmar_f(r0,r1,r2,r3) FMADDS(r0,r1,r2,r3)
+# define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3)
+static void _fmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define extr_f(r0,r1) SCVTFS(r0,r1)
# define ldr_f(r0,r1) _ldr_f(_jit,r0,r1)
static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -231,6 +244,10 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
# define absr_d(r0,r1) FABSD(r0,r1)
# define negr_d(r0,r1) FNEGD(r0,r1)
# define sqrtr_d(r0,r1) FSQRTD(r0,r1)
+# define fmar_d(r0,r1,r2,r3) FMADDD(r0,r1,r2,r3)
+# define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3)
+static void _fmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define extr_d(r0,r1) SCVTFD(r0,r1)
# define ldr_d(r0,r1) _ldr_d(_jit,r0,r1)
static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -356,6 +373,26 @@ _osvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
}
static void
+_osvvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
+ jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Ra)
+{
+ instr_t i;
+ assert(!(Rd & ~0x1f));
+ assert(!(Rn & ~0x1f));
+ assert(!(Rm & ~0x1f));
+ assert(!(Ra & ~0x1f));
+ assert(!(Sz & ~0x3));
+ assert(!(Op & ~0xff008000));
+ i.w = Op;
+ i.size.b = Sz;
+ i.Rd.b = Rd;
+ i.Rn.b = Rn;
+ i.Rm.b = Rm;
+ i.Ra.b = Ra;
+ ii(i.w);
+}
+
+static void
_osvv_(jit_state_t *_jit, jit_int32_t Op,
jit_int32_t Sz, jit_int32_t Rd, jit_int32_t Rn)
{
@@ -483,6 +520,15 @@ fopi(mul)
fopi(div)
static void
+_fmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* r0 = r3 + (-r2) * r1 */
+ FMSUBS(r0, r1, r2, r3);
+ negr_f(r0, r0);
+}
+
+static void
_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
@@ -710,6 +756,15 @@ dopi(mul)
dopi(div)
static void
+_fmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* r0 = r3 + (-r2) * r1 */
+ FMSUBD(r0, r1, r2, r3);
+ negr_d(r0, r0);
+}
+
+static void
_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index 3345494..afef726 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1658,6 +1658,16 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
+ case jit_code_fmar_f:
+ fmar_f(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmai_f:
+ break;
+ case jit_code_fmsr_f:
+ fmsr_f(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmsi_f:
+ break;
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1753,6 +1763,16 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
+ case jit_code_fmar_d:
+ fmar_d(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmai_d:
+ break;
+ case jit_code_fmsr_d:
+ fmsr_d(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmsi_d:
+ break;
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
generated by cgit v1.2.3 (git 2.46.0) at 2025年09月29日 10:08:55 +0000

AltStyle によって変換されたページ (->オリジナル) /