lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
Diffstat
-rw-r--r--check/fma.tst 206
-rw-r--r--check/lightning.c 12
-rw-r--r--include/lightning.h.in 21
-rw-r--r--lib/jit_aarch64-fpu.c 42
-rw-r--r--lib/jit_aarch64.c 34
-rw-r--r--lib/jit_alpha-fpu.c 60
-rw-r--r--lib/jit_alpha.c 34
-rw-r--r--lib/jit_arm-swf.c 60
-rw-r--r--lib/jit_arm-vfp.c 150
-rw-r--r--lib/jit_arm.c 54
-rw-r--r--lib/jit_hppa-fpu.c 74
-rw-r--r--lib/jit_hppa.c 34
-rw-r--r--lib/jit_ia64-fpu.c 24
-rw-r--r--lib/jit_ia64.c 34
-rw-r--r--lib/jit_loongarch-fpu.c 4
-rw-r--r--lib/jit_loongarch.c 34
-rw-r--r--lib/jit_mips-cpu.c 4
-rw-r--r--lib/jit_mips-fpu.c 142
-rw-r--r--lib/jit_mips.c 34
-rw-r--r--lib/jit_names.c 4
-rw-r--r--lib/jit_ppc-fpu.c 4
-rw-r--r--lib/jit_ppc.c 34
-rw-r--r--lib/jit_riscv-fpu.c 4
-rw-r--r--lib/jit_riscv.c 34
-rw-r--r--lib/jit_s390-fpu.c 86
-rw-r--r--lib/jit_s390.c 34
-rw-r--r--lib/jit_sparc-fpu.c 24
-rw-r--r--lib/jit_sparc.c 34
-rw-r--r--lib/jit_x86-sse.c 120
-rw-r--r--lib/jit_x86-x87.c 60
-rw-r--r--lib/jit_x86.c 91
-rw-r--r--lib/lightning.c 80
32 files changed, 1311 insertions, 355 deletions
diff --git a/check/fma.tst b/check/fma.tst
index 056511f..f87f709 100644
--- a/check/fma.tst
+++ b/check/fma.tst
@@ -211,6 +211,212 @@ fsi2:
calli @abort
dsi2:
+ /* Simple encoding test for all different registers */
+ movi_f %f0 0.0
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmar_f %f0 %f1 %f2 %f3
+ beqi_f fna0 %f0 -10.0
+ calli @abort
+fna0:
+ movi_d %f0 0.0
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmar_d %f0 %f1 %f2 %f3
+ beqi_d dna0 %f0 -26.0
+ calli @abort
+dna0:
+ movi_f %f0 0.0
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmsr_f %f0 %f1 %f2 %f3
+ beqi_f fns0 %f0 -2.0
+ calli @abort
+fns0:
+ movi_d %f0 0.0
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmsr_d %f0 %f1 %f2 %f3
+ beqi_d dns0 %f0 -14.0
+ calli @abort
+dns0:
+
+ /* Simple encoding test for result also first argument */
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmar_f %f1 %f1 %f2 %f3
+ beqi_f fna1 %f1 -10.0
+ calli @abort
+fna1:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmar_d %f1 %f1 %f2 %f3
+ beqi_d dna1 %f1 -26.0
+ calli @abort
+dna1:
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmsr_f %f1 %f1 %f2 %f3
+ beqi_f fns1 %f1 -2.0
+ calli @abort
+fns1:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmsr_d %f1 %f1 %f2 %f3
+ beqi_d dns1 %f1 -14.0
+ calli @abort
+dns1:
+
+ /* Simple encoding test for result also second argument */
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmar_f %f2 %f1 %f2 %f3
+ beqi_f fna2 %f2 -10.0
+ calli @abort
+fna2:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmar_d %f2 %f1 %f2 %f3
+ beqi_d dna2 %f2 -26.0
+ calli @abort
+dna2:
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmsr_f %f2 %f1 %f2 %f3
+ beqi_f fns2 %f2 -2.0
+ calli @abort
+fns2:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmsr_d %f2 %f1 %f2 %f3
+ beqi_d dns2 %f2 -14.0
+ calli @abort
+dns2:
+
+ /* Simple encoding test for result also third argument */
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmsr_f %f3 %f1 %f2 %f3
+ beqi_f fns3 %f3 -2.0
+ calli @abort
+fns3:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmsr_d %f3 %f1 %f2 %f3
+ beqi_d dns3 %f3 -14.0
+ calli @abort
+dns3:
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ movi_f %f3 4.0
+ fnmar_f %f3 %f1 %f2 %f3
+ beqi_f fna3 %f3 -10.0
+ calli @abort
+fna3:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ movi_d %f3 6.0
+ fnmar_d %f3 %f1 %f2 %f3
+ beqi_d dna3 %f3 -26.0
+ calli @abort
+dna3:
+
+ /* Simple encoding test for all different registers */
+ movi_f %f0 0.0
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmai_f %f0 %f1 %f2 4.0
+ beqi_f fnai0 %f0 -10.0
+ calli @abort
+fnai0:
+ movi_d %f0 0.0
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmai_d %f0 %f1 %f2 6.0
+ beqi_d dnai0 %f0 -26.0
+ calli @abort
+dnai0:
+ movi_f %f0 0.0
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmsi_f %f0 %f1 %f2 4.0
+ beqi_f fnsi0 %f0 -2.0
+ calli @abort
+fnsi0:
+ movi_d %f0 0.0
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmsi_d %f0 %f1 %f2 6.0
+ beqi_d dnsi0 %f0 -14.0
+ calli @abort
+dnsi0:
+
+ /* Simple encoding test for result also first argument */
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmai_f %f1 %f1 %f2 4.0
+ beqi_f fnai1 %f1 -10.0
+ calli @abort
+fnai1:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmai_d %f1 %f1 %f2 6.0
+ beqi_d dnai1 %f1 -26.0
+ calli @abort
+dnai1:
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmsi_f %f1 %f1 %f2 4.0
+ beqi_f fnsi1 %f1 -2.0
+ calli @abort
+fnsi1:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmsi_d %f1 %f1 %f2 6.0
+ beqi_d dnsi1 %f1 -14.0
+ calli @abort
+dnsi1:
+
+ /* Simple encoding test for result also second argument */
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmai_f %f2 %f1 %f2 4.0
+ beqi_f fnai2 %f2 -10.0
+ calli @abort
+fnai2:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmai_d %f2 %f1 %f2 6.0
+ beqi_d dnai2 %f2 -26.0
+ calli @abort
+dnai2:
+ movi_f %f1 2.0
+ movi_f %f2 3.0
+ fnmsi_f %f2 %f1 %f2 4.0
+ beqi_f fnsi2 %f2 -2.0
+ calli @abort
+fnsi2:
+ movi_d %f1 4.0
+ movi_d %f2 5.0
+ fnmsi_d %f2 %f1 %f2 6.0
+ beqi_d dnsi2 %f2 -14.0
+ calli @abort
+dnsi2:
+
prepare
pushargi ok
finishi @puts
diff --git a/check/lightning.c b/check/lightning.c
index ea5b270..f04ecd8 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -476,6 +476,8 @@ static void absr_f(void); static void absi_f(void);
static void sqrtr_f(void); static void sqrti_f(void);
static void fmar_f(void); static void fmai_f(void);
static void fmsr_f(void); static void fmsi_f(void);
+static void fnmar_f(void); static void fnmai_f(void);
+static void fnmsr_f(void); static void fnmsi_f(void);
static void ltr_f(void); static void lti_f(void);
static void ler_f(void); static void lei_f(void);
static void eqr_f(void); static void eqi_f(void);
@@ -535,6 +537,8 @@ static void absr_d(void); static void absi_d(void);
static void sqrtr_d(void); static void sqrti_d(void);
static void fmar_d(void); static void fmai_d(void);
static void fmsr_d(void); static void fmsi_d(void);
+static void fnmar_d(void); static void fnmai_d(void);
+static void fnmsr_d(void); static void fnmsi_d(void);
static void ltr_d(void); static void lti_d(void);
static void ler_d(void); static void lei_d(void);
static void eqr_d(void); static void eqi_d(void);
@@ -875,6 +879,8 @@ static instr_t instr_vector[] = {
entry(sqrtr_f), entry(sqrti_f),
entry(fmar_f), entry(fmai_f),
entry(fmsr_f), entry(fmsi_f),
+ entry(fnmar_f), entry(fnmai_f),
+ entry(fnmsr_f), entry(fnmsi_f),
entry(ltr_f), entry(lti_f),
entry(ler_f), entry(lei_f),
entry(eqr_f), entry(eqi_f),
@@ -933,6 +939,8 @@ static instr_t instr_vector[] = {
entry(sqrtr_d), entry(sqrti_d),
entry(fmar_d), entry(fmai_d),
entry(fmsr_d), entry(fmsi_d),
+ entry(fnmar_d), entry(fnmai_d),
+ entry(fnmsr_d), entry(fnmsi_d),
entry(ltr_d), entry(lti_d),
entry(ler_d), entry(lei_d),
entry(eqr_d), entry(eqi_d),
@@ -1933,6 +1941,8 @@ entry_fr_fr(absr_f) entry_fr_fm(absi_f)
entry_fr_fr(sqrtr_f) entry_fr_fm(sqrti_f)
entry_fr_fr_fr_fr(fmar_f) entry_fr_fr_fr_fm(fmai_f)
entry_fr_fr_fr_fr(fmsr_f) entry_fr_fr_fr_fm(fmsi_f)
+entry_fr_fr_fr_fr(fnmar_f) entry_fr_fr_fr_fm(fnmai_f)
+entry_fr_fr_fr_fr(fnmsr_f) entry_fr_fr_fr_fm(fnmsi_f)
entry_ir_fr_fr(ltr_f) entry_ir_fr_fm(lti_f)
entry_ir_fr_fr(ler_f) entry_ir_fr_fm(lei_f)
entry_ir_fr_fr(eqr_f) entry_ir_fr_fm(eqi_f)
@@ -1991,6 +2001,8 @@ entry_fr_fr(absr_d) entry_fr_fm(absi_d)
entry_fr_fr(sqrtr_d) entry_fr_fm(sqrti_d)
entry_fr_fr_fr_fr(fmar_d) entry_fr_fr_fr_dm(fmai_d)
entry_fr_fr_fr_fr(fmsr_d) entry_fr_fr_fr_dm(fmsi_d)
+entry_fr_fr_fr_fr(fnmar_d) entry_fr_fr_fr_dm(fnmai_d)
+entry_fr_fr_fr_fr(fnmsr_d) entry_fr_fr_fr_dm(fnmsi_d)
entry_ir_fr_fr(ltr_d) entry_ir_fr_dm(lti_d)
entry_ir_fr_fr(ler_d) entry_ir_fr_dm(lei_d)
entry_ir_fr_fr(eqr_d) entry_ir_fr_dm(eqi_d)
diff --git a/include/lightning.h.in b/include/lightning.h.in
index 41b96e2..3c049ed 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -1197,6 +1197,19 @@ typedef enum {
#define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x)
jit_code_fmsr_d, jit_code_fmsi_d,
+#define jit_fnmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_f, u, v, w, x)
+#define jit_fnmai_f(u,v,w,x) _jit_fnmai_f(_jit, u, v, w, x)
+ jit_code_fnmar_f, jit_code_fnmai_f,
+#define jit_fnmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_f, u, v, w, x)
+#define jit_fnmsi_f(u,v,w,x) _jit_fnmsi_f(_jit, u, v, w, x)
+ jit_code_fnmsr_f, jit_code_fnmsi_f,
+#define jit_fnmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_d, u, v, w, x)
+#define jit_fnmai_d(u,v,w,x) _jit_fnmai_d(_jit, u, v, w, x)
+ jit_code_fnmar_d, jit_code_fnmai_d,
+#define jit_fnmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_d, u, v, w, x)
+#define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x)
+ jit_code_fnmsr_d, jit_code_fnmsi_d,
+
jit_code_last_code
} jit_code_t;
@@ -1324,6 +1337,10 @@ extern void _jit_fmai_f(jit_state_t*,
jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
extern void _jit_fmsi_f(jit_state_t*,
jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fnmai_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fnmsi_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
extern jit_node_t *_jit_arg_d(jit_state_t*);
extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*);
@@ -1341,6 +1358,10 @@ extern void _jit_fmai_d(jit_state_t*,
jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
extern void _jit_fmsi_d(jit_state_t*,
jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fnmai_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fnmsi_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
#define jit_get_reg(s) _jit_get_reg(_jit,s)
extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c
index 8ca39cf..51f40ce 100644
--- a/lib/jit_aarch64-fpu.c
+++ b/lib/jit_aarch64-fpu.c
@@ -33,6 +33,8 @@
# define A64_FSQRT 0x1e21c000
# define A64_FMADD 0x1f000000
# define A64_FMSUB 0x1f008000
+# define A64_FNMADD 0x1f200000
+# define A64_FNMSUB 0x1f208000
# define A64_FCVTS 0x1e224000
# define A64_FCVTD 0x1e22c000
# define A64_FMUL 0x1e200800
@@ -63,10 +65,18 @@
# define FNEGD(Rd,Rn) osvv_(A64_FNEG,1,Rd,Rn)
# define FSQRTS(Rd,Rn) osvv_(A64_FSQRT,0,Rd,Rn)
# define FSQRTD(Rd,Rn) osvv_(A64_FSQRT,1,Rd,Rn)
+/* Vd = Va + Vn*Vm */
# define FMADDS(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,0,Rd,Rn,Rm,Ra)
# define FMADDD(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,1,Rd,Rn,Rm,Ra)
+/* Vd = Va + (-Vn)*Vm */
# define FMSUBS(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,0,Rd,Rn,Rm,Ra)
# define FMSUBD(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,1,Rd,Rn,Rm,Ra)
+/* Vd = (-Va) + (-Vn)*Vm */
+# define FNMADDS(Rd,Rn,Rm,Ra) osvvvv(A64_FNMADD,0,Rd,Rn,Rm,Ra)
+# define FNMADDD(Rd,Rn,Rm,Ra) osvvvv(A64_FNMADD,1,Rd,Rn,Rm,Ra)
+/* Vd = (-Va) + Vn*Vm */
+# define FNMSUBS(Rd,Rn,Rm,Ra) osvvvv(A64_FNMSUB,0,Rd,Rn,Rm,Ra)
+# define FNMSUBD(Rd,Rn,Rm,Ra) osvvvv(A64_FNMSUB,1,Rd,Rn,Rm,Ra)
# define FADDS(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm)
# define FADDD(Rd,Rn,Rm) osvvv(A64_FADD,1,Rd,Rn,Rm)
# define FADDV(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm)
@@ -118,9 +128,9 @@ static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
# define negr_f(r0,r1) FNEGS(r0,r1)
# define sqrtr_f(r0,r1) FSQRTS(r0,r1)
# define fmar_f(r0,r1,r2,r3) FMADDS(r0,r1,r2,r3)
-# define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3)
-static void _fmsr_f(jit_state_t*,
- jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fmsr_f(r0,r1,r2,r3) FNMSUBS(r0,r1,r2,r3)
+# define fnmar_f(r0,r1,r2,r3) FNMADDS(r0,r1,r2,r3)
+# define fnmsr_f(r0,r1,r2,r3) FMSUBS(r0,r1,r2,r3)
# define extr_f(r0,r1) SCVTFS(r0,r1)
# define ldr_f(r0,r1) _ldr_f(_jit,r0,r1)
static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -245,9 +255,9 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
# define negr_d(r0,r1) FNEGD(r0,r1)
# define sqrtr_d(r0,r1) FSQRTD(r0,r1)
# define fmar_d(r0,r1,r2,r3) FMADDD(r0,r1,r2,r3)
-# define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3)
-static void _fmsr_d(jit_state_t*,
- jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fmsr_d(r0,r1,r2,r3) FNMSUBD(r0,r1,r2,r3)
+# define fnmar_d(r0,r1,r2,r3) FNMADDD(r0,r1,r2,r3)
+# define fnmsr_d(r0,r1,r2,r3) FMSUBD(r0,r1,r2,r3)
# define extr_d(r0,r1) SCVTFD(r0,r1)
# define ldr_d(r0,r1) _ldr_d(_jit,r0,r1)
static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -382,7 +392,7 @@ _osvvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
assert(!(Rm & ~0x1f));
assert(!(Ra & ~0x1f));
assert(!(Sz & ~0x3));
- assert(!(Op & ~0xff008000));
+ assert(!(Op & ~0xff208000));
i.w = Op;
i.size.b = Sz;
i.Rd.b = Rd;
@@ -520,15 +530,6 @@ fopi(mul)
fopi(div)
static void
-_fmsr_f(jit_state_t *_jit,
- jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
-{
- /* r0 = r3 + (-r2) * r1 */
- FMSUBS(r0, r1, r2, r3);
- negr_f(r0, r0);
-}
-
-static void
_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
@@ -756,15 +757,6 @@ dopi(mul)
dopi(div)
static void
-_fmsr_d(jit_state_t *_jit,
- jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
-{
- /* r0 = r3 + (-r2) * r1 */
- FMSUBD(r0, r1, r2, r3);
- negr_d(r0, r0);
-}
-
-static void
_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index afef726..cd4e79c 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1297,6 +1297,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1658,16 +1664,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1763,16 +1763,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_alpha-fpu.c b/lib/jit_alpha-fpu.c
index 66b8e13..9e4d0dc 100644
--- a/lib/jit_alpha-fpu.c
+++ b/lib/jit_alpha-fpu.c
@@ -321,6 +321,12 @@ static void _fmar_f(jit_state_t*,
# define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3)
static void _fmsr_f(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3)
+static void _fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3)
+static void _fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1)
static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3)
@@ -329,6 +335,12 @@ static void _fmar_d(jit_state_t*,
# define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3)
static void _fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3)
+static void _fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3)
+static void _fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define extr_f_d(r0,r1) movr_d(r0,r1)
# define extr_d_f(r0,r1) movr_f(r0,r1)
# define truncr_f_i(r0,r1) truncr_d_i(r0,r1)
@@ -762,6 +774,30 @@ _fmsr_f(jit_state_t *_jit,
}
static void
+_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
SQRTT_SU(r1, r0);
@@ -803,6 +839,30 @@ _fmsr_d(jit_state_t *_jit,
}
static void
+_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
stxi_l(-8, _FP_REGNO, r1);
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index 91b892a..d2d378f 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -911,6 +911,12 @@ _emit_code(jit_state_t *_jit)
name##i##type(rn(node->u.w), rn(node->v.w), \
(jit_float##size##_t *)node->w.n->u.w); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrrr(name, type) \
case jit_code_##name##r##type: \
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
@@ -1256,16 +1262,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1369,16 +1369,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c
index f63d939..46e4308 100644
--- a/lib/jit_arm-swf.c
+++ b/lib/jit_arm-swf.c
@@ -168,6 +168,18 @@ static void _swf_fmar_d(jit_state_t*,
# define swf_fmsr_d(r0,r1,r2,r3) _swf_fmsr_d(_jit,r0,r1,r2,r3)
static void _swf_fmsr_d(jit_state_t*,
jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define swf_fnmar_f(r0,r1,r2,r3) _swf_fnmar_f(_jit,r0,r1,r2,r3)
+static void _swf_fnmar_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define swf_fnmsr_f(r0,r1,r2,r3) _swf_fnmsr_f(_jit,r0,r1,r2,r3)
+static void _swf_fnmsr_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define swf_fnmar_d(r0,r1,r2,r3) _swf_fnmar_d(_jit,r0,r1,r2,r3)
+static void _swf_fnmar_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define swf_fnmsr_d(r0,r1,r2,r3) _swf_fnmsr_d(_jit,r0,r1,r2,r3)
+static void _swf_fnmsr_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define swf_addr_f(r0,r1,r2) swf_fff(__addsf3,r0,r1,r2)
# define swf_addi_f(r0,r1,i0) swf_fff_(__addsf3,r0,r1,i0)
# define swf_addr_d(r0,r1,r2) swf_ddd(__adddf3,r0,r1,r2)
@@ -2241,6 +2253,30 @@ _swf_fmsr_f(jit_state_t *_jit,
}
static void
+_swf_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ swf_negr_f(rn(t0), r1);
+ swf_mulr_f(rn(t0), rn(t0), r2);
+ swf_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_swf_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ swf_negr_f(rn(t0), r1);
+ swf_mulr_f(rn(t0), rn(t0), r2);
+ swf_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
_swf_fmar_d(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
@@ -2275,6 +2311,30 @@ _swf_fmsr_d(jit_state_t *_jit,
}
static void
+_swf_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ swf_negr_d(rn(t0), r1);
+ swf_mulr_d(rn(t0), rn(t0), r2);
+ swf_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_swf_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ swf_negr_d(rn(t0), r1);
+ swf_mulr_d(rn(t0), rn(t0), r2);
+ swf_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
_swf_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
swf_iff(__aeabi_fcmpeq, r0, r1, r2);
diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c
index dfe43e4..adf6a3b 100644
--- a/lib/jit_arm-vfp.c
+++ b/lib/jit_arm-vfp.c
@@ -90,6 +90,8 @@
# define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
# define ARM_VFMA 0x0ea00a00
# define ARM_VFMS 0x0ea00a40
+# define ARM_VFNMA 0x0e900a00
+# define ARM_VFNMS 0x0e900a40
# define ARM_V_D 0x00400000
# define ARM_V_N 0x00000080
# define ARM_V_Q 0x00000040
@@ -222,6 +224,14 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
# define VFMS_F32(r0,r1,r2) CC_VFMS_F32(ARM_CC_AL,r0,r1,r2)
# define CC_VFMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS|ARM_V_F64,r0,r1,r2)
# define VFMS_F64(r0,r1,r2) CC_VFMS_F64(ARM_CC_AL,r0,r1,r2)
+# define CC_VFNMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA,r0,r1,r2)
+# define VFNMA_F32(r0,r1,r2) CC_VFNMA_F32(ARM_CC_AL,r0,r1,r2)
+# define CC_VFNMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA|ARM_V_F64,r0,r1,r2)
+# define VFNMA_F64(r0,r1,r2) CC_VFNMA_F64(ARM_CC_AL,r0,r1,r2)
+# define CC_VFNMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS,r0,r1,r2)
+# define VFNMS_F32(r0,r1,r2) CC_VFNMS_F32(ARM_CC_AL,r0,r1,r2)
+# define CC_VFNMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS|ARM_V_F64,r0,r1,r2)
+# define VFNMS_F64(r0,r1,r2) CC_VFNMS_F64(ARM_CC_AL,r0,r1,r2)
# define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
# define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1)
# define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
@@ -521,6 +531,12 @@ static void _vfp_fmar_f(jit_state_t*,
# define vfp_fmsr_f(r0,r1,r2,r3) _vfp_fmsr_f(_jit,r0,r1,r2,r3)
static void _vfp_fmsr_f(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define vfp_fnmar_f(r0,r1,r2,r3) _vfp_fnmar_f(_jit,r0,r1,r2,r3)
+static void _vfp_fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define vfp_fnmsr_f(r0,r1,r2,r3) _vfp_fnmsr_f(_jit,r0,r1,r2,r3)
+static void _vfp_fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1)
# define vfp_fmar_d(r0,r1,r2,r3) _vfp_fmar_d(_jit,r0,r1,r2,r3)
static void _vfp_fmar_d(jit_state_t*,
@@ -528,6 +544,12 @@ static void _vfp_fmar_d(jit_state_t*,
# define vfp_fmsr_d(r0,r1,r2,r3) _vfp_fmsr_d(_jit,r0,r1,r2,r3)
static void _vfp_fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define vfp_fnmar_d(r0,r1,r2,r3) _vfp_fnmar_d(_jit,r0,r1,r2,r3)
+static void _vfp_fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define vfp_fnmsr_d(r0,r1,r2,r3) _vfp_fnmsr_d(_jit,r0,r1,r2,r3)
+static void _vfp_fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2)
# define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0)
static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
@@ -1490,7 +1512,8 @@ _vfp_fmar_f(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t t0;
- if (jit_armv7r_p()) {
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
if (r0 != r2 && r0 != r3) {
vfp_movr_f(r0, r1);
VFMA_F32(r0, r2, r3);
@@ -1507,7 +1530,7 @@ _vfp_fmar_f(jit_state_t *_jit,
if (r0 != r3) {
vfp_mulr_f(r0, r1, r2);
vfp_addr_f(r0, r0, r3);
- }
+ }
else {
t0 = jit_get_reg(jit_class_fpr);
vfp_mulr_f(rn(t0), r1, r2);
@@ -1522,7 +1545,8 @@ _vfp_fmsr_f(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t t0;
- if (jit_armv7r_p()) {
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
if (r0 != r2 && r0 != r3) {
vfp_movr_f(r0, r1);
VFMS_F32(r0, r2, r3);
@@ -1551,11 +1575,69 @@ _vfp_fmsr_f(jit_state_t *_jit,
}
static void
+_vfp_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
+ if (r0 != r2 && r0 != r3) {
+ vfp_movr_f(r0, r1);
+ VFNMA_F32(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_movr_f(rn(t0), r1);
+ VFNMA_F32(rn(t0), r2, r3);
+ vfp_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_negr_f(rn(t0), r1);
+ vfp_mulr_f(rn(t0), rn(t0), r2);
+ vfp_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_vfp_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
+ if (r0 != r2 && r0 != r3) {
+ vfp_movr_f(r0, r1);
+ VFNMS_F32(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_movr_f(rn(t0), r1);
+ VFNMS_F32(rn(t0), r2, r3);
+ vfp_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ vfp_negr_f(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_negr_f(rn(t0), r1);
+ vfp_mulr_f(rn(t0), rn(t0), r2);
+ vfp_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
_vfp_fmar_d(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t t0;
- if (jit_armv7r_p()) {
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
if (r0 != r2 && r0 != r3) {
vfp_movr_d(r0, r1);
VFMA_F64(r0, r2, r3);
@@ -1587,7 +1669,8 @@ _vfp_fmsr_d(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t t0;
- if (jit_armv7r_p()) {
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
if (r0 != r2 && r0 != r3) {
vfp_movr_d(r0, r1);
VFMS_F64(r0, r2, r3);
@@ -1615,6 +1698,63 @@ _vfp_fmsr_d(jit_state_t *_jit,
}
}
+static void
+_vfp_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
+ if (r0 != r2 && r0 != r3) {
+ vfp_movr_d(r0, r1);
+ VFNMA_F64(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_movr_d(rn(t0), r1);
+ VFNMA_F64(rn(t0), r2, r3);
+ vfp_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_negr_d(rn(t0), r1);
+ vfp_mulr_d(rn(t0), rn(t0), r2);
+ vfp_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_vfp_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ /* untested */
+ if (0 && jit_cpu.vfp >= 4) {
+ if (r0 != r2 && r0 != r3) {
+ vfp_movr_d(r0, r1);
+ VFNMS_F64(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_movr_d(rn(t0), r1);
+ VFNMS_F64(rn(t0), r2, r3);
+ vfp_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ vfp_negr_d(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ vfp_negr_d(rn(t0), r1);
+ vfp_mulr_d(rn(t0), rn(t0), r2);
+ vfp_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
# define fopi(name) \
static void \
_vfp_##name##i_f(jit_state_t *_jit, \
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 8d39b9a..df6c0e7 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1342,6 +1342,16 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.w), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ if (jit_swf_p()) \
+ swf_##name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w));\
+ else \
+ vfp_##name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w));\
+ case jit_code_##name##i##type: \
+ break
#define case_rrrr(name, type) \
case jit_code_##name##r##type: \
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
@@ -1791,24 +1801,10 @@ _emit_code(jit_state_t *_jit)
case_vv(abs, _f);
case_vv(neg, _f);
case_vv(sqrt, _f);
- case jit_code_fmar_f:
- if (jit_swf_p())
- swf_fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else
- vfp_fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- if (jit_swf_p())
- swf_fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else
- vfp_fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_vv(ext, _f);
case_vv(ld, _f);
case_vw(ld, _f);
@@ -1919,24 +1915,10 @@ _emit_code(jit_state_t *_jit)
case_vv(abs, _d);
case_vv(neg, _d);
case_vv(sqrt, _d);
- case jit_code_fmar_d:
- if (jit_swf_p())
- swf_fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else
- vfp_fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- if (jit_swf_p())
- swf_fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else
- vfp_fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_vv(ext, _d);
case_vv(ld, _d);
case_vw(ld, _d);
diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c
index c676884..699d31e 100644
--- a/lib/jit_hppa-fpu.c
+++ b/lib/jit_hppa-fpu.c
@@ -277,6 +277,12 @@ static void _fmar_f(jit_state_t*,
#define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3)
static void _fmsr_f(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3)
+static void _fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3)
+static void _fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
#define sqrtr_d(r0,r1) FSQRT_D(r1,r0)
#define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3)
static void _fmar_d(jit_state_t*,
@@ -284,6 +290,12 @@ static void _fmar_d(jit_state_t*,
#define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3)
static void _fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3)
+static void _fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3)
+static void _fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
#define extr_f(r0,r1) _extr_f(_jit,r0,r1)
static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
#define extr_d(r0,r1) _extr_d(_jit,r0,r1)
@@ -737,7 +749,6 @@ _fmsr_f(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t reg;
-#if 1
if (r0 != r3) {
mulr_f(r0, r1, r2);
subr_f(r0, r0, r3);
@@ -748,12 +759,30 @@ _fmsr_f(jit_state_t *_jit,
subr_f(r0, rn(reg), r3);
jit_unget_reg(reg);
}
-#else
- reg = jit_get_reg(jit_class_fpr);
- negr_f(rn(reg), r3);
- fmar_f(r0, r1, r2, rn(reg));
- jit_unget_reg(reg);
-#endif
+}
+
+static void
+_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
}
static void
@@ -813,7 +842,6 @@ _fmsr_d(jit_state_t *_jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t reg;
-#if 1
if (r0 != r3) {
mulr_d(r0, r1, r2);
subr_d(r0, r0, r3);
@@ -824,12 +852,30 @@ _fmsr_d(jit_state_t *_jit,
subr_d(r0, rn(reg), r3);
jit_unget_reg(reg);
}
-#else
- reg = jit_get_reg(jit_class_fpr);
- negr_d(rn(reg), r3);
- fmar_d(r0, r1, r2, rn(reg));
- jit_unget_reg(reg);
-#endif
+}
+
+static void
+_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
}
static void
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index b62ceb5..e392c6d 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -899,6 +899,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.w), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrrr(name, type) \
case jit_code_##name##r##type: \
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
@@ -1259,27 +1265,15 @@ _emit_code(jit_state_t *_jit)
case_rr(neg, _f);
case_rr(neg, _d);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rrr(add, _f);
case_rrf(add, _f, 32);
case_rrr(add, _d);
diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c
index bd026f5..e29bff7 100644
--- a/lib/jit_ia64-fpu.c
+++ b/lib/jit_ia64-fpu.c
@@ -455,10 +455,18 @@ static void _movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
#define fmar_f(r0,r1,r2,r3) FMA_S(r0,r2,r3,r1)
#define fmsr_f(r0,r1,r2,r3) FMS_S(r0,r2,r3,r1)
+#define fnmar_f(r0,r1,r2,r3) FNMA_S(r0,r2,r3,r1)
+#define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3)
+static void _fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
#define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1)
static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
#define fmar_d(r0,r1,r2,r3) FMA_D(r0,r2,r3,r1)
#define fmsr_d(r0,r1,r2,r3) FMS_D(r0,r2,r3,r1)
+#define fnmar_d(r0,r1,r2,r3) FNMA_D(r0,r2,r3,r1)
+#define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3)
+static void _fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
#define extr_f_d(r0,r1) FNORM_D(r0,r1)
#define extr_d_f(r0,r1) FNORM_S(r0,r1)
#define extr_f(r0,r1) _extr_f(_jit,r0,r1)
@@ -1565,6 +1573,14 @@ _sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
}
static void
+_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ fmsr_f(r0, r1, r2, r3);
+ negr_f(r0, r0);
+}
+
+static void
_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
movr_d(GR_8, r1);
@@ -1572,6 +1588,14 @@ _sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
MOVF(r0, GR_8);
}
+static void
+_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ fmsr_d(r0, r1, r2, r3);
+ negr_d(r0, r0);
+}
+
static jit_word_t
_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index cf3e5b2..d385e8d 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -1048,6 +1048,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1409,16 +1415,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ld, _f);
case_rw(ld, _f);
case_rrr(ldx, _f);
@@ -1514,16 +1514,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ld, _d);
case_rw(ld, _d);
case_rrr(ldx, _d);
diff --git a/lib/jit_loongarch-fpu.c b/lib/jit_loongarch-fpu.c
index 8559791..a3fd02a 100644
--- a/lib/jit_loongarch-fpu.c
+++ b/lib/jit_loongarch-fpu.c
@@ -217,6 +217,8 @@ static void _divi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
# define sqrtr_f(r0, r1) FSQRT_S(r0, r1)
# define fmar_f(r0, r1, r2, r3) FMADD_S(r0, r1, r2, r3)
# define fmsr_f(r0, r1, r2, r3) FMSUB_S(r0, r1, r2, r3)
+# define fnmar_f(r0, r1, r2, r3) FNMADD_S(r0, r1, r2, r3)
+# define fnmsr_f(r0, r1, r2, r3) FNMSUB_S(r0, r1, r2, r3)
# define extr_f(r0, r1) _extr_f(_jit, r0, r1)
static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t);
# define ldr_f(r0, r1) FLD_S(r0, r1, 0)
@@ -379,6 +381,8 @@ static void _divi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
# define sqrtr_d(r0, r1) FSQRT_D(r0, r1)
# define fmar_d(r0, r1, r2, r3) FMADD_D(r0, r1, r2, r3)
# define fmsr_d(r0, r1, r2, r3) FMSUB_D(r0, r1, r2, r3)
+# define fnmar_d(r0, r1, r2, r3) FNMADD_D(r0, r1, r2, r3)
+# define fnmsr_d(r0, r1, r2, r3) FNMSUB_D(r0, r1, r2, r3)
# define extr_d(r0, r1) _extr_d(_jit, r0, r1)
static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
# define ldr_d(r0, r1) FLD_D(r0, r1, 0)
diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c
index bdf98e5..b892563 100644
--- a/lib/jit_loongarch.c
+++ b/lib/jit_loongarch.c
@@ -968,6 +968,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1339,16 +1345,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1444,16 +1444,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 24afbb2..78871a9 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -1447,6 +1447,10 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
case MIPS_MADD_fmt_D:
case MIPS_MSUB_fmt_S:
case MIPS_MSUB_fmt_D:
+ case MIPS_NMADD_fmt_S:
+ case MIPS_NMADD_fmt_D:
+ case MIPS_NMSUB_fmt_S:
+ case MIPS_NMSUB_fmt_D:
assert(!jit_mips6_p());
if (mask & jit_class_gpr)
regs[0] = regs[1] = regs[2] = 0;
diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c
index 9c4b620..ca74956 100644
--- a/lib/jit_mips-fpu.c
+++ b/lib/jit_mips-fpu.c
@@ -150,8 +150,14 @@
# define MADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_D)
# define MSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_S)
# define MSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_D)
+# define NMADD_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_S)
+# define NMADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_D)
+# define NMSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_S)
+# define NMSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_D)
+/* fd = fd + (fs * ft) */
# define MADDF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MADDF)
# define MADDF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MADDF)
+/* fd = fd - (fs * ft) */
# define MSUBF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MSUBF)
# define MSUBF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MSUBF)
# define LWC1(rt, of, rb) hrri(MIPS_LWC1, rb, rt, of)
@@ -319,9 +325,9 @@ static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define stxi_f(i0, r0, r1) _stxi_f(_jit, i0, r0, r1)
static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define unstr_x(r0, r1, i0) _unstr_x(_jit, r0, r1, i0)
+# define unstr_x(r0, r1, i0) _unstr_x(_jit, r0, r1, i0)
static void _unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1)
+# define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1)
static void _unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3)
static void _fmar_f(jit_state_t*,
@@ -329,6 +335,12 @@ static void _fmar_f(jit_state_t*,
# define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3)
static void _fmsr_f(jit_state_t*,
jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fnmar_f(r0, r1, r2, r3) _fnmar_f(_jit, r0, r1, r2, r3)
+static void _fnmar_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fnmsr_f(r0, r1, r2, r3) _fnmsr_f(_jit, r0, r1, r2, r3)
+static void _fnmsr_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define movr_f(r0, r1) _movr_f(_jit, r0, r1)
static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi_f(r0, i0) _movi_f(_jit, r0, i0)
@@ -382,6 +394,12 @@ static void _fmar_d(jit_state_t*,
# define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3)
static void _fmsr_d(jit_state_t*,
jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fnmar_d(r0, r1, r2, r3) _fnmar_d(_jit, r0, r1, r2, r3)
+static void _fnmar_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fnmsr_d(r0, r1, r2, r3) _fnmsr_d(_jit, r0, r1, r2, r3)
+static void _fnmsr_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define movr_d(r0, r1) _movr_d(_jit, r0, r1)
static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi_d(r0, i0) _movi_d(_jit, r0, i0)
@@ -961,7 +979,6 @@ _fmsr_f(jit_state_t *_jit,
jit_int32_t t0;
if (jit_mips2_p()) {
if (jit_mips6_p()) {
- /* fd = fd - (fs * ft) */
if (r0 == r3)
MSUBF_S(r0, r2, r1);
else {
@@ -991,6 +1008,65 @@ _fmsr_f(jit_state_t *_jit,
}
static void
+_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MADDF_S(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MADDF_S(rn(t0), r2, r1);
+ movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ negr_f(r0, r0);
+ }
+ else
+ NMADD_S(r0, r3, r2, r1);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MSUBF_S(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MSUBF_S(rn(t0), r2, r1);
+ movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ NMSUB_S(r0, r3, r2, r1);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_f(rn(t0), r1);
+ mulr_f(rn(t0), rn(t0), r2);
+ addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
@@ -1347,7 +1423,6 @@ _fmsr_d(jit_state_t *_jit,
jit_int32_t t0;
if (jit_mips2_p()) {
if (jit_mips6_p()) {
- /* fd = fd - (fs * ft) */
if (r0 == r3)
MSUBF_D(r0, r2, r1);
else {
@@ -1377,6 +1452,65 @@ _fmsr_d(jit_state_t *_jit,
}
static void
+_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MADDF_D(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
+ MADDF_D(rn(t0), r2, r1);
+ movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ negr_d(r0, r0);
+ }
+ else
+ NMADD_D(r0, r3, r2, r1);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MSUBF_D(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
+ MSUBF_D(rn(t0), r2, r1);
+ movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ NMSUB_D(r0, r3, r2, r1);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ negr_d(rn(t0), r1);
+ mulr_d(rn(t0), rn(t0), r2);
+ addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 17fd531..e49307d 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1434,6 +1434,12 @@ _emit_code(jit_state_t *_jit)
name##i##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), node->w.w); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrf(name, type, size) \
case jit_code_##name##i##type: \
assert(node->flag & jit_flag_data); \
@@ -1800,16 +1806,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1905,16 +1905,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_names.c b/lib/jit_names.c
index a6bb023..eba8e30 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -291,4 +291,8 @@ static char *code_name[] = {
"fmsr_f", "fmsi_f",
"fmar_d", "fmai_d",
"fmsr_d", "fmsi_d",
+ "fnmar_f", "fnmai_f",
+ "fnmsr_f", "fnmsi_f",
+ "fnmar_d", "fnmai_d",
+ "fnmsr_d", "fnmsi_d",
};
diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c
index ab066a8..5366f05 100644
--- a/lib/jit_ppc-fpu.c
+++ b/lib/jit_ppc-fpu.c
@@ -164,6 +164,10 @@ static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define fmar_d(r0,r1,r2,r3) FMADD(r0,r1,r3,r2)
# define fmsr_f(r0,r1,r2,r3) FMSUBS(r0,r1,r3,r2)
# define fmsr_d(r0,r1,r2,r3) FMSUB(r0,r1,r3,r2)
+# define fnmar_f(r0,r1,r2,r3) FNMADDS(r0,r1,r3,r2)
+# define fnmar_d(r0,r1,r2,r3) FNMADD(r0,r1,r3,r2)
+# define fnmsr_f(r0,r1,r2,r3) FNMSUBS(r0,r1,r3,r2)
+# define fnmsr_d(r0,r1,r2,r3) FNMSUB(r0,r1,r3,r2)
# define addr_f(r0,r1,r2) FADDS(r0,r1,r2)
# define addr_d(r0,r1,r2) FADD(r0,r1,r2)
# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0)
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index 8653b57..32dda20 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -1220,6 +1220,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1583,16 +1589,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rrr(add, _f);
case_rrf(add, _f, 32);
case_rrr(sub, _f);
@@ -1692,16 +1692,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rrr(add, _d);
case_rrf(add, _d, 64);
case_rrr(sub, _d);
diff --git a/lib/jit_riscv-fpu.c b/lib/jit_riscv-fpu.c
index 68ecf36..bfe64fc 100644
--- a/lib/jit_riscv-fpu.c
+++ b/lib/jit_riscv-fpu.c
@@ -129,6 +129,8 @@ static void _divi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
# define sqrtr_f(r0, r1) FSQRT_S(r0, r1)
# define fmar_f(r0, r1, r2, r3) FMADD_S(r0, r1, r2, r3)
# define fmsr_f(r0, r1, r2, r3) FMSUB_S(r0, r1, r2, r3)
+# define fnmar_f(r0, r1, r2, r3) FNMADD_S(r0, r1, r2, r3)
+# define fnmsr_f(r0, r1, r2, r3) FNMSUB_S(r0, r1, r2, r3)
# define extr_f(r0, r1) FCVT_S_L(r0, r1)
# define ldr_f(r0, r1) FLW(r0, r1, 0)
# define ldi_f(r0, im) _ldi_f(_jit, r0, im)
@@ -287,6 +289,8 @@ static void _divi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
# define sqrtr_d(r0, r1) FSQRT_D(r0, r1)
# define fmar_d(r0, r1, r2, r3) FMADD_D(r0, r1, r2, r3)
# define fmsr_d(r0, r1, r2, r3) FMSUB_D(r0, r1, r2, r3)
+# define fnmar_d(r0, r1, r2, r3) FNMADD_D(r0, r1, r2, r3)
+# define fnmsr_d(r0, r1, r2, r3) FNMSUB_D(r0, r1, r2, r3)
# define extr_d(r0, r1) FCVT_D_L(r0, r1)
# define ldr_d(r0, r1) FLD(r0, r1, 0)
# define ldi_d(r0, im) _ldi_d(_jit, r0, im)
diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c
index a923ef0..2a399a9 100644
--- a/lib/jit_riscv.c
+++ b/lib/jit_riscv.c
@@ -1020,6 +1020,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1402,16 +1408,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1507,16 +1507,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_s390-fpu.c b/lib/jit_s390-fpu.c
index 9d6f350..db848d9 100644
--- a/lib/jit_s390-fpu.c
+++ b/lib/jit_s390-fpu.c
@@ -359,6 +359,12 @@ static void _fmar_f(jit_state_t*,
# define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3)
static void _fmsr_f(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3)
+static void _fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3)
+static void _fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define sqrtr_d(r0,r1) SQDBR(r0,r1)
# define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3)
static void _fmar_d(jit_state_t*,
@@ -366,6 +372,12 @@ static void _fmar_d(jit_state_t*,
# define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3)
static void _fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3)
+static void _fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3)
+static void _fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define truncr_f_i(r0,r1) CFEBR(r0,RND_ZERO,r1)
# define truncr_d_i(r0,r1) CFDBR(r0,RND_ZERO,r1)
# if __WORDSIZE == 64
@@ -1012,6 +1024,40 @@ _fmsr_f(jit_state_t* _jit,
}
static void
+_fnmar_f(jit_state_t* _jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ if (r0 == r3) {
+ MAER(r0, r2, r1);
+ negr_f(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MAER(rn(t0), r2, r1);
+ negr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_fnmsr_f(jit_state_t* _jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ if (r0 == r3) {
+ MSER(r0, r2, r1);
+ negr_f(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MSER(rn(t0), r2, r1);
+ negr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+}
+
+static void
_fmar_d(jit_state_t* _jit,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
@@ -1019,9 +1065,9 @@ _fmar_d(jit_state_t* _jit,
MADR(r0, r2, r1);
else {
t0 = jit_get_reg(jit_class_fpr);
- movr_f(rn(t0), r3);
+ movr_d(rn(t0), r3);
MADR(rn(t0), r2, r1);
- movr_f(r0, rn(t0));
+ movr_d(r0, rn(t0));
jit_unget_reg(t0);
}
}
@@ -1034,9 +1080,43 @@ _fmsr_d(jit_state_t* _jit,
MSDR(r0, r2, r1);
else {
t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
+ MSDR(rn(t0), r2, r1);
+ movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_fnmar_d(jit_state_t* _jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ if (r0 == r3) {
+ MADR(r0, r2, r1);
+ negr_d(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
movr_f(rn(t0), r3);
+ MADR(rn(t0), r2, r1);
+ negr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_fnmsr_d(jit_state_t* _jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ if (r0 == r3) {
+ MSDR(r0, r2, r1);
+ negr_d(r0, r0);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
MSDR(rn(t0), r2, r1);
- movr_f(r0, rn(t0));
+ negr_d(r0, rn(t0));
jit_unget_reg(t0);
}
}
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 0b31a9a..858ea30 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -978,6 +978,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
@@ -1379,16 +1385,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1488,16 +1488,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c
index 474815d..3cdb870 100644
--- a/lib/jit_sparc-fpu.c
+++ b/lib/jit_sparc-fpu.c
@@ -130,14 +130,6 @@ _f4f(jit_state_t*,jit_int32_t,jit_int32_t,
# define FSQRTS(rs2, rd) FPop1(rd, 0, 41, rs2)
# define FSQRTD(rs2, rd) FPop1(rd, 0, 42, rs2)
# define FSQRTQ(rs2, rd) FPop1(rd, 0, 43, rs2)
-# define FMADDS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDS, rs2)
-# define FMADDD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDD, rs2)
-# define FMSUBS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBS, rs2)
-# define FMSUBD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBD, rs2)
-# define FNMSUBS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBS,rs2)
-# define FNMSUBD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBD,rs2)
-# define FNMADDS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDS,rs2)
-# define FNMADDD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDD,rs2)
# define SPARC_FADDS 65
# define SPARC_FADDD 66
# define SPARC_FADDQ 67
@@ -160,6 +152,14 @@ _f4f(jit_state_t*,jit_int32_t,jit_int32_t,
# define SPARC_FNMSUBD 10
# define SPARC_FNMADDS 13
# define SPARC_FNMADDD 14
+# define FMADDS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDS, rs2)
+# define FMADDD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDD, rs2)
+# define FMSUBS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBS, rs2)
+# define FMSUBD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBD, rs2)
+# define FNMSUBS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBS,rs2)
+# define FNMSUBD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBD,rs2)
+# define FNMADDS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDS,rs2)
+# define FNMADDD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDD,rs2)
# define FADDS(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDS, rs2)
# define FADDD(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDD, rs2)
# define FADDQ(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDQ, rs2)
@@ -236,6 +236,10 @@ static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t);
# define fmsr_f(r0, r1, r2, r3) FMSUBS(r1, r2, r3, r0)
# define fmar_d(r0, r1, r2, r3) FMADDD(r1, r2, r3, r0)
# define fmsr_d(r0, r1, r2, r3) FMSUBD(r1, r2, r3, r0)
+# define fnmar_f(r0, r1, r2, r3) FNMADDS(r1, r2, r3, r0)
+# define fnmsr_f(r0, r1, r2, r3) FNMSUBS(r1, r2, r3, r0)
+# define fnmar_d(r0, r1, r2, r3) FNMADDD(r1, r2, r3, r0)
+# define fnmsr_d(r0, r1, r2, r3) FNMSUBD(r1, r2, r3, r0)
# else
# define fop3f(op, r0, r1, r2, r3) _fop3f(_jit, op, r0, r1, r2, r3)
static void _fop3f(jit_state_t*, jit_int32_t, jit_int32_t,
@@ -244,6 +248,10 @@ static void _fop3f(jit_state_t*, jit_int32_t, jit_int32_t,
# define fmsr_f(r0, r1, r2, r3) fop3f(SPARC_FMSUBS, r0, r1, r2, r3)
# define fmar_d(r0, r1, r2, r3) fop3f(SPARC_FMADDD, r0, r1, r2, r3)
# define fmsr_d(r0, r1, r2, r3) fop3f(SPARC_FMSUBD, r0, r1, r2, r3)
+# define fnmar_f(r0, r1, r2, r3) fop3f(SPARC_FNMADDS, r0, r1, r2, r3)
+# define fnmsr_f(r0, r1, r2, r3) fop3f(SPARC_FNMSUBS, r0, r1, r2, r3)
+# define fnmar_d(r0, r1, r2, r3) fop3f(SPARC_FNMADDD, r0, r1, r2, r3)
+# define fnmsr_d(r0, r1, r2, r3) fop3f(SPARC_FNMSUBD, r0, r1, r2, r3)
# endif
# define extr_d(r0, r1) _extr_d(_jit, r0, r1)
static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index 5d4ce85..1acf636 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1292,6 +1292,12 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.q.l), \
+ rn(node->v.q.h), rn(node->w.w)); \
+ case jit_code_##name##i##type: \
+ break;
#define case_rrw(name, type) \
case jit_code_##name##i##type: \
name##i##type(rn(node->u.w), \
@@ -1680,16 +1686,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
- case jit_code_fmar_f:
- fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_f:
- break;
- case jit_code_fmsr_f:
- fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_f:
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_rr(ext, _f);
case_rr(ext, _d_f);
case_rrr(lt, _f);
@@ -1789,16 +1789,10 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
- case jit_code_fmar_d:
- fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmai_d:
- break;
- case jit_code_fmsr_d:
- fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- case jit_code_fmsi_d:
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_rr(ext, _d);
case_rr(ext, _f_d);
case_rrr(lt, _d);
diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c
index 8e4422f..930efed 100644
--- a/lib/jit_x86-sse.c
+++ b/lib/jit_x86-sse.c
@@ -178,6 +178,18 @@ static void _sse_fmsr_f(jit_state_t*,
# define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3)
static void _sse_fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
static void
@@ -1001,6 +1013,114 @@ _sse_fmsr_d(jit_state_t *_jit,
}
static void
+_sse_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmsub213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmsub213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmsub213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmsub213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmadd213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmadd213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmadd213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmadd213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c
index a64287e..61040da 100644
--- a/lib/jit_x86-x87.c
+++ b/lib/jit_x86-x87.c
@@ -132,6 +132,18 @@ static void _x87_fmsr_f(jit_state_t*,
# define x87_fmsr_d(r0, r1, r2, r3) _x87_fmsr_d(_jit, r0, r1, r2, r3)
static void _x87_fmsr_d(jit_state_t*,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define x87_fnmar_f(r0, r1, r2, r3) _x87_fnmar_f(_jit, r0, r1, r2, r3)
+static void _x87_fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define x87_fnmar_d(r0, r1, r2, r3) _x87_fnmar_d(_jit, r0, r1, r2, r3)
+static void _x87_fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define x87_fnmsr_f(r0, r1, r2, r3) _x87_fnmsr_f(_jit, r0, r1, r2, r3)
+static void _x87_fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define x87_fnmsr_d(r0, r1, r2, r3) _x87_fnmsr_d(_jit, r0, r1, r2, r3)
+static void _x87_fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define x87_truncr_f_i(r0, r1) _x87_truncr_d_i(_jit, r0, r1)
# define x87_truncr_d_i(r0, r1) _x87_truncr_d_i(_jit, r0, r1)
static void _x87_truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
@@ -773,6 +785,54 @@ _x87_fmsr_d(jit_state_t *_jit,
}
static void
+_x87_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ x87_negr_f(rn(t0), r1);
+ x87_mulr_f(rn(t0), rn(t0), r2);
+ x87_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_x87_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ x87_negr_d(rn(t0), r1);
+ x87_mulr_d(rn(t0), rn(t0), r2);
+ x87_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_x87_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ x87_negr_f(rn(t0), r1);
+ x87_mulr_f(rn(t0), rn(t0), r2);
+ x87_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
+_x87_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_fpr);
+ x87_negr_d(rn(t0), r1);
+ x87_mulr_d(rn(t0), rn(t0), r2);
+ x87_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+}
+
+static void
_x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
CHECK_CVT_OFFSET();
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 8f49e6d..fb5f3ca 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1557,6 +1557,27 @@ _emit_code(jit_state_t *_jit)
name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
rn(node->v.w), rn(node->w.w)); \
break
+#define case_rqr(name, type) \
+ case jit_code_##name##r##type: \
+ if (jit_x87_reg_p(node->u.w) && \
+ jit_x87_reg_p(node->v.q.l) && \
+ jit_x87_reg_p(node->v.q.h) && \
+ jit_x87_reg_p(node->w.w)) \
+ x87_##name##r##type(rn(node->u.w), \
+ rn(node->v.q.l), \
+ rn(node->v.q.h), \
+ rn(node->w.w)); \
+ else { \
+ assert(jit_sse_reg_p(node->u.w) && \
+ jit_sse_reg_p(node->v.q.l) && \
+ jit_sse_reg_p(node->v.q.h) && \
+ jit_sse_reg_p(node->w.w)); \
+ sse_##name##r##type(rn(node->u.w), \
+ rn(node->v.q.l), \
+ rn(node->v.q.h), \
+ rn(node->w.w)); \
+ } \
+ break;
#define case_frr(name, type) \
case jit_code_##name##r##type: \
if (jit_x87_reg_p(node->u.w)) \
@@ -2038,36 +2059,10 @@ _emit_code(jit_state_t *_jit)
case_ff(abs, _f);
case_ff(neg, _f);
case_ff(sqrt, _f);
- case jit_code_fmar_f:
- if (jit_x87_reg_p(node->u.w) &&
- jit_x87_reg_p(node->v.q.l) &&
- jit_x87_reg_p(node->v.q.h) &&
- jit_x87_reg_p(node->w.w))
- x87_fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else {
- assert(jit_sse_reg_p(node->u.w) &&
- jit_sse_reg_p(node->v.q.l) &&
- jit_sse_reg_p(node->v.q.h) &&
- jit_sse_reg_p(node->w.w));
- sse_fmar_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- }
- break;
- case jit_code_fmsr_f:
- if (jit_x87_reg_p(node->u.w) && jit_x87_reg_p(node->v.q.l) &&
- jit_x87_reg_p(node->v.q.h) && jit_x87_reg_p(node->w.w))
- x87_fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else {
- assert(jit_sse_reg_p(node->u.w) &&
- jit_sse_reg_p(node->v.q.l) &&
- jit_sse_reg_p(node->v.q.h) &&
- jit_sse_reg_p(node->w.w));
- sse_fmsr_f(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- }
- break;
+ case_rqr(fma, _f);
+ case_rqr(fms, _f);
+ case_rqr(fnma, _f);
+ case_rqr(fnms, _f);
case_fr(ext, _f);
case_fr(ext, _d_f);
case_rff(lt, _f);
@@ -2191,36 +2186,10 @@ _emit_code(jit_state_t *_jit)
case_ff(abs, _d);
case_ff(neg, _d);
case_ff(sqrt, _d);
- case jit_code_fmar_d:
- if (jit_x87_reg_p(node->u.w) &&
- jit_x87_reg_p(node->v.q.l) &&
- jit_x87_reg_p(node->v.q.h) &&
- jit_x87_reg_p(node->w.w))
- x87_fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else {
- assert(jit_sse_reg_p(node->u.w) &&
- jit_sse_reg_p(node->v.q.l) &&
- jit_sse_reg_p(node->v.q.h) &&
- jit_sse_reg_p(node->w.w));
- sse_fmar_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- }
- break;
- case jit_code_fmsr_d:
- if (jit_x87_reg_p(node->u.w) && jit_x87_reg_p(node->v.q.l) &&
- jit_x87_reg_p(node->v.q.h) && jit_x87_reg_p(node->w.w))
- x87_fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- else {
- assert(jit_sse_reg_p(node->u.w) &&
- jit_sse_reg_p(node->v.q.l) &&
- jit_sse_reg_p(node->v.q.h) &&
- jit_sse_reg_p(node->w.w));
- sse_fmsr_d(rn(node->u.w), rn(node->v.q.l),
- rn(node->v.q.h), rn(node->w.w));
- }
- break;
+ case_rqr(fma, _d);
+ case_rqr(fms, _d);
+ case_rqr(fnma, _d);
+ case_rqr(fnms, _d);
case_fr(ext, _d);
case_fr(ext, _f_d);
case_rff(lt, _d);
@@ -2568,6 +2537,8 @@ _emit_code(jit_state_t *_jit)
case jit_code_absi_d: case jit_code_sqrti_d:
case jit_code_fmai_f: case jit_code_fmsi_f:
case jit_code_fmai_d: case jit_code_fmsi_d:
+ case jit_code_fnmai_f: case jit_code_fnmsi_f:
+ case jit_code_fnmai_d: case jit_code_fnmsi_d:
break;
case jit_code_retval_f:
#if __X32
diff --git a/lib/lightning.c b/lib/lightning.c
index 19163ba..95502e5 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1714,14 +1714,18 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
break;
case jit_code_fmar_f: case jit_code_fmar_d:
case jit_code_fmsr_f: case jit_code_fmsr_d:
+ case jit_code_fnmar_f: case jit_code_fnmar_d:
+ case jit_code_fnmsr_f: case jit_code_fnmsr_d:
mask = jit_cc_a0_reg|jit_cc_a0_chg|
jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg;
break;
case jit_code_fmai_f: case jit_code_fmsi_f:
+ case jit_code_fnmai_f: case jit_code_fnmsi_f:
mask = jit_cc_a0_reg|jit_cc_a0_chg|
jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt;
break;
case jit_code_fmai_d: case jit_code_fmsi_d:
+ case jit_code_fnmai_d: case jit_code_fnmsi_d:
mask = jit_cc_a0_reg|jit_cc_a0_chg|
jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl;
break;
@@ -4607,6 +4611,44 @@ _jit_fmsi_f(jit_state_t *_jit,
}
void
+_jit_fnmai_f(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqf(fmai_f, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_f(u, x);
+ jit_fnmar_f(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_f(y, x);
+ jit_fnmar_f(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_fnmsi_f(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqf(fmai_f, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_f(u, x);
+ jit_fnmsr_f(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_f(y, x);
+ jit_fnmsr_f(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
_jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
{
jit_inc_synth_wd(negi_d, u, v);
@@ -4671,6 +4713,44 @@ _jit_fmsi_d(jit_state_t *_jit,
jit_dec_synth();
}
+void
+_jit_fnmai_d(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqd(fmai_d, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_d(u, x);
+ jit_fnmar_d(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_d(y, x);
+ jit_fnmar_d(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_fnmsi_d(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqd(fmai_d, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_d(u, x);
+ jit_fnmsr_d(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_d(y, x);
+ jit_fnmsr_d(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
static void
_cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
generated by cgit v1.2.3 (git 2.46.0) at 2025年10月03日 23:52:04 +0000

AltStyle によって変換されたページ (->オリジナル) /