-rw-r--r-- | check/fma.tst | 206 | ||||
-rw-r--r-- | check/lightning.c | 12 | ||||
-rw-r--r-- | include/lightning.h.in | 21 | ||||
-rw-r--r-- | lib/jit_aarch64-fpu.c | 42 | ||||
-rw-r--r-- | lib/jit_aarch64.c | 34 | ||||
-rw-r--r-- | lib/jit_alpha-fpu.c | 60 | ||||
-rw-r--r-- | lib/jit_alpha.c | 34 | ||||
-rw-r--r-- | lib/jit_arm-swf.c | 60 | ||||
-rw-r--r-- | lib/jit_arm-vfp.c | 150 | ||||
-rw-r--r-- | lib/jit_arm.c | 54 | ||||
-rw-r--r-- | lib/jit_hppa-fpu.c | 74 | ||||
-rw-r--r-- | lib/jit_hppa.c | 34 | ||||
-rw-r--r-- | lib/jit_ia64-fpu.c | 24 | ||||
-rw-r--r-- | lib/jit_ia64.c | 34 | ||||
-rw-r--r-- | lib/jit_loongarch-fpu.c | 4 | ||||
-rw-r--r-- | lib/jit_loongarch.c | 34 | ||||
-rw-r--r-- | lib/jit_mips-cpu.c | 4 | ||||
-rw-r--r-- | lib/jit_mips-fpu.c | 142 | ||||
-rw-r--r-- | lib/jit_mips.c | 34 | ||||
-rw-r--r-- | lib/jit_names.c | 4 | ||||
-rw-r--r-- | lib/jit_ppc-fpu.c | 4 | ||||
-rw-r--r-- | lib/jit_ppc.c | 34 | ||||
-rw-r--r-- | lib/jit_riscv-fpu.c | 4 | ||||
-rw-r--r-- | lib/jit_riscv.c | 34 | ||||
-rw-r--r-- | lib/jit_s390-fpu.c | 86 | ||||
-rw-r--r-- | lib/jit_s390.c | 34 | ||||
-rw-r--r-- | lib/jit_sparc-fpu.c | 24 | ||||
-rw-r--r-- | lib/jit_sparc.c | 34 | ||||
-rw-r--r-- | lib/jit_x86-sse.c | 120 | ||||
-rw-r--r-- | lib/jit_x86-x87.c | 60 | ||||
-rw-r--r-- | lib/jit_x86.c | 91 | ||||
-rw-r--r-- | lib/lightning.c | 80 |
diff --git a/check/fma.tst b/check/fma.tst index 056511f..f87f709 100644 --- a/check/fma.tst +++ b/check/fma.tst @@ -211,6 +211,212 @@ fsi2: calli @abort dsi2: + /* Simple encoding test for all different registers */ + movi_f %f0 0.0 + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmar_f %f0 %f1 %f2 %f3 + beqi_f fna0 %f0 -10.0 + calli @abort +fna0: + movi_d %f0 0.0 + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmar_d %f0 %f1 %f2 %f3 + beqi_d dna0 %f0 -26.0 + calli @abort +dna0: + movi_f %f0 0.0 + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmsr_f %f0 %f1 %f2 %f3 + beqi_f fns0 %f0 -2.0 + calli @abort +fns0: + movi_d %f0 0.0 + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmsr_d %f0 %f1 %f2 %f3 + beqi_d dns0 %f0 -14.0 + calli @abort +dns0: + + /* Simple encoding test for result also first argument */ + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmar_f %f1 %f1 %f2 %f3 + beqi_f fna1 %f1 -10.0 + calli @abort +fna1: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmar_d %f1 %f1 %f2 %f3 + beqi_d dna1 %f1 -26.0 + calli @abort +dna1: + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmsr_f %f1 %f1 %f2 %f3 + beqi_f fns1 %f1 -2.0 + calli @abort +fns1: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmsr_d %f1 %f1 %f2 %f3 + beqi_d dns1 %f1 -14.0 + calli @abort +dns1: + + /* Simple encoding test for result also second argument */ + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmar_f %f2 %f1 %f2 %f3 + beqi_f fna2 %f2 -10.0 + calli @abort +fna2: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmar_d %f2 %f1 %f2 %f3 + beqi_d dna2 %f2 -26.0 + calli @abort +dna2: + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmsr_f %f2 %f1 %f2 %f3 + beqi_f fns2 %f2 -2.0 + calli @abort +fns2: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmsr_d %f2 %f1 %f2 %f3 + beqi_d dns2 %f2 -14.0 + calli @abort +dns2: + + /* Simple encoding test for result also third argument */ + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmsr_f %f3 %f1 %f2 %f3 + beqi_f fns3 %f3 -2.0 + calli @abort +fns3: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmsr_d %f3 %f1 %f2 %f3 + beqi_d dns3 %f3 -14.0 + calli @abort +dns3: + movi_f %f1 2.0 + movi_f %f2 3.0 + movi_f %f3 4.0 + fnmar_f %f3 %f1 %f2 %f3 + beqi_f fna3 %f3 -10.0 + calli @abort +fna3: + movi_d %f1 4.0 + movi_d %f2 5.0 + movi_d %f3 6.0 + fnmar_d %f3 %f1 %f2 %f3 + beqi_d dna3 %f3 -26.0 + calli @abort +dna3: + + /* Simple encoding test for all different registers */ + movi_f %f0 0.0 + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmai_f %f0 %f1 %f2 4.0 + beqi_f fnai0 %f0 -10.0 + calli @abort +fnai0: + movi_d %f0 0.0 + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmai_d %f0 %f1 %f2 6.0 + beqi_d dnai0 %f0 -26.0 + calli @abort +dnai0: + movi_f %f0 0.0 + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmsi_f %f0 %f1 %f2 4.0 + beqi_f fnsi0 %f0 -2.0 + calli @abort +fnsi0: + movi_d %f0 0.0 + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmsi_d %f0 %f1 %f2 6.0 + beqi_d dnsi0 %f0 -14.0 + calli @abort +dnsi0: + + /* Simple encoding test for result also first argument */ + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmai_f %f1 %f1 %f2 4.0 + beqi_f fnai1 %f1 -10.0 + calli @abort +fnai1: + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmai_d %f1 %f1 %f2 6.0 + beqi_d dnai1 %f1 -26.0 + calli @abort +dnai1: + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmsi_f %f1 %f1 %f2 4.0 + beqi_f fnsi1 %f1 -2.0 + calli @abort +fnsi1: + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmsi_d %f1 %f1 %f2 6.0 + beqi_d dnsi1 %f1 -14.0 + calli @abort +dnsi1: + + /* Simple encoding test for result also second argument */ + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmai_f %f2 %f1 %f2 4.0 + beqi_f fnai2 %f2 -10.0 + calli @abort +fnai2: + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmai_d %f2 %f1 %f2 6.0 + beqi_d dnai2 %f2 -26.0 + calli @abort +dnai2: + movi_f %f1 2.0 + movi_f %f2 3.0 + fnmsi_f %f2 %f1 %f2 4.0 + beqi_f fnsi2 %f2 -2.0 + calli @abort +fnsi2: + movi_d %f1 4.0 + movi_d %f2 5.0 + fnmsi_d %f2 %f1 %f2 6.0 + beqi_d dnsi2 %f2 -14.0 + calli @abort +dnsi2: + prepare pushargi ok finishi @puts diff --git a/check/lightning.c b/check/lightning.c index ea5b270..f04ecd8 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -476,6 +476,8 @@ static void absr_f(void); static void absi_f(void); static void sqrtr_f(void); static void sqrti_f(void); static void fmar_f(void); static void fmai_f(void); static void fmsr_f(void); static void fmsi_f(void); +static void fnmar_f(void); static void fnmai_f(void); +static void fnmsr_f(void); static void fnmsi_f(void); static void ltr_f(void); static void lti_f(void); static void ler_f(void); static void lei_f(void); static void eqr_f(void); static void eqi_f(void); @@ -535,6 +537,8 @@ static void absr_d(void); static void absi_d(void); static void sqrtr_d(void); static void sqrti_d(void); static void fmar_d(void); static void fmai_d(void); static void fmsr_d(void); static void fmsi_d(void); +static void fnmar_d(void); static void fnmai_d(void); +static void fnmsr_d(void); static void fnmsi_d(void); static void ltr_d(void); static void lti_d(void); static void ler_d(void); static void lei_d(void); static void eqr_d(void); static void eqi_d(void); @@ -875,6 +879,8 @@ static instr_t instr_vector[] = { entry(sqrtr_f), entry(sqrti_f), entry(fmar_f), entry(fmai_f), entry(fmsr_f), entry(fmsi_f), + entry(fnmar_f), entry(fnmai_f), + entry(fnmsr_f), entry(fnmsi_f), entry(ltr_f), entry(lti_f), entry(ler_f), entry(lei_f), entry(eqr_f), entry(eqi_f), @@ -933,6 +939,8 @@ static instr_t instr_vector[] = { entry(sqrtr_d), entry(sqrti_d), entry(fmar_d), entry(fmai_d), entry(fmsr_d), entry(fmsi_d), + entry(fnmar_d), entry(fnmai_d), + entry(fnmsr_d), entry(fnmsi_d), entry(ltr_d), entry(lti_d), entry(ler_d), entry(lei_d), entry(eqr_d), entry(eqi_d), @@ -1933,6 +1941,8 @@ entry_fr_fr(absr_f) entry_fr_fm(absi_f) entry_fr_fr(sqrtr_f) entry_fr_fm(sqrti_f) entry_fr_fr_fr_fr(fmar_f) entry_fr_fr_fr_fm(fmai_f) entry_fr_fr_fr_fr(fmsr_f) entry_fr_fr_fr_fm(fmsi_f) +entry_fr_fr_fr_fr(fnmar_f) entry_fr_fr_fr_fm(fnmai_f) +entry_fr_fr_fr_fr(fnmsr_f) entry_fr_fr_fr_fm(fnmsi_f) entry_ir_fr_fr(ltr_f) entry_ir_fr_fm(lti_f) entry_ir_fr_fr(ler_f) entry_ir_fr_fm(lei_f) entry_ir_fr_fr(eqr_f) entry_ir_fr_fm(eqi_f) @@ -1991,6 +2001,8 @@ entry_fr_fr(absr_d) entry_fr_fm(absi_d) entry_fr_fr(sqrtr_d) entry_fr_fm(sqrti_d) entry_fr_fr_fr_fr(fmar_d) entry_fr_fr_fr_dm(fmai_d) entry_fr_fr_fr_fr(fmsr_d) entry_fr_fr_fr_dm(fmsi_d) +entry_fr_fr_fr_fr(fnmar_d) entry_fr_fr_fr_dm(fnmai_d) +entry_fr_fr_fr_fr(fnmsr_d) entry_fr_fr_fr_dm(fnmsi_d) entry_ir_fr_fr(ltr_d) entry_ir_fr_dm(lti_d) entry_ir_fr_fr(ler_d) entry_ir_fr_dm(lei_d) entry_ir_fr_fr(eqr_d) entry_ir_fr_dm(eqi_d) diff --git a/include/lightning.h.in b/include/lightning.h.in index 41b96e2..3c049ed 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -1197,6 +1197,19 @@ typedef enum { #define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x) jit_code_fmsr_d, jit_code_fmsi_d, +#define jit_fnmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_f, u, v, w, x) +#define jit_fnmai_f(u,v,w,x) _jit_fnmai_f(_jit, u, v, w, x) + jit_code_fnmar_f, jit_code_fnmai_f, +#define jit_fnmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_f, u, v, w, x) +#define jit_fnmsi_f(u,v,w,x) _jit_fnmsi_f(_jit, u, v, w, x) + jit_code_fnmsr_f, jit_code_fnmsi_f, +#define jit_fnmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_d, u, v, w, x) +#define jit_fnmai_d(u,v,w,x) _jit_fnmai_d(_jit, u, v, w, x) + jit_code_fnmar_d, jit_code_fnmai_d, +#define jit_fnmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_d, u, v, w, x) +#define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x) + jit_code_fnmsr_d, jit_code_fnmsi_d, + jit_code_last_code } jit_code_t; @@ -1324,6 +1337,10 @@ extern void _jit_fmai_f(jit_state_t*, jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); extern void _jit_fmsi_f(jit_state_t*, jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fnmai_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fnmsi_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); extern jit_node_t *_jit_arg_d(jit_state_t*); extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*); @@ -1341,6 +1358,10 @@ extern void _jit_fmai_d(jit_state_t*, jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); extern void _jit_fmsi_d(jit_state_t*, jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fnmai_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fnmsi_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); #define jit_get_reg(s) _jit_get_reg(_jit,s) extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t); diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c index 8ca39cf..51f40ce 100644 --- a/lib/jit_aarch64-fpu.c +++ b/lib/jit_aarch64-fpu.c @@ -33,6 +33,8 @@ # define A64_FSQRT 0x1e21c000 # define A64_FMADD 0x1f000000 # define A64_FMSUB 0x1f008000 +# define A64_FNMADD 0x1f200000 +# define A64_FNMSUB 0x1f208000 # define A64_FCVTS 0x1e224000 # define A64_FCVTD 0x1e22c000 # define A64_FMUL 0x1e200800 @@ -63,10 +65,18 @@ # define FNEGD(Rd,Rn) osvv_(A64_FNEG,1,Rd,Rn) # define FSQRTS(Rd,Rn) osvv_(A64_FSQRT,0,Rd,Rn) # define FSQRTD(Rd,Rn) osvv_(A64_FSQRT,1,Rd,Rn) +/* Vd = Va + Vn*Vm */ # define FMADDS(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,0,Rd,Rn,Rm,Ra) # define FMADDD(Rd,Rn,Rm,Ra) osvvvv(A64_FMADD,1,Rd,Rn,Rm,Ra) +/* Vd = Va + (-Vn)*Vm */ # define FMSUBS(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,0,Rd,Rn,Rm,Ra) # define FMSUBD(Rd,Rn,Rm,Ra) osvvvv(A64_FMSUB,1,Rd,Rn,Rm,Ra) +/* Vd = (-Va) + (-Vn)*Vm */ +# define FNMADDS(Rd,Rn,Rm,Ra) osvvvv(A64_FNMADD,0,Rd,Rn,Rm,Ra) +# define FNMADDD(Rd,Rn,Rm,Ra) osvvvv(A64_FNMADD,1,Rd,Rn,Rm,Ra) +/* Vd = (-Va) + Vn*Vm */ +# define FNMSUBS(Rd,Rn,Rm,Ra) osvvvv(A64_FNMSUB,0,Rd,Rn,Rm,Ra) +# define FNMSUBD(Rd,Rn,Rm,Ra) osvvvv(A64_FNMSUB,1,Rd,Rn,Rm,Ra) # define FADDS(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm) # define FADDD(Rd,Rn,Rm) osvvv(A64_FADD,1,Rd,Rn,Rm) # define FADDV(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm) @@ -118,9 +128,9 @@ static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define negr_f(r0,r1) FNEGS(r0,r1) # define sqrtr_f(r0,r1) FSQRTS(r0,r1) # define fmar_f(r0,r1,r2,r3) FMADDS(r0,r1,r2,r3) -# define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3) -static void _fmsr_f(jit_state_t*, - jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fmsr_f(r0,r1,r2,r3) FNMSUBS(r0,r1,r2,r3) +# define fnmar_f(r0,r1,r2,r3) FNMADDS(r0,r1,r2,r3) +# define fnmsr_f(r0,r1,r2,r3) FMSUBS(r0,r1,r2,r3) # define extr_f(r0,r1) SCVTFS(r0,r1) # define ldr_f(r0,r1) _ldr_f(_jit,r0,r1) static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t); @@ -245,9 +255,9 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define negr_d(r0,r1) FNEGD(r0,r1) # define sqrtr_d(r0,r1) FSQRTD(r0,r1) # define fmar_d(r0,r1,r2,r3) FMADDD(r0,r1,r2,r3) -# define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3) -static void _fmsr_d(jit_state_t*, - jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fmsr_d(r0,r1,r2,r3) FNMSUBD(r0,r1,r2,r3) +# define fnmar_d(r0,r1,r2,r3) FNMADDD(r0,r1,r2,r3) +# define fnmsr_d(r0,r1,r2,r3) FMSUBD(r0,r1,r2,r3) # define extr_d(r0,r1) SCVTFD(r0,r1) # define ldr_d(r0,r1) _ldr_d(_jit,r0,r1) static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t); @@ -382,7 +392,7 @@ _osvvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz, assert(!(Rm & ~0x1f)); assert(!(Ra & ~0x1f)); assert(!(Sz & ~0x3)); - assert(!(Op & ~0xff008000)); + assert(!(Op & ~0xff208000)); i.w = Op; i.size.b = Sz; i.Rd.b = Rd; @@ -520,15 +530,6 @@ fopi(mul) fopi(div) static void -_fmsr_f(jit_state_t *_jit, - jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) -{ - /* r0 = r3 + (-r2) * r1 */ - FMSUBS(r0, r1, r2, r3); - negr_f(r0, r0); -} - -static void _ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; @@ -756,15 +757,6 @@ dopi(mul) dopi(div) static void -_fmsr_d(jit_state_t *_jit, - jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) -{ - /* r0 = r3 + (-r2) * r1 */ - FMSUBD(r0, r1, r2, r3); - negr_d(r0, r0); -} - -static void _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index afef726..cd4e79c 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1297,6 +1297,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1658,16 +1664,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1763,16 +1763,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_alpha-fpu.c b/lib/jit_alpha-fpu.c index 66b8e13..9e4d0dc 100644 --- a/lib/jit_alpha-fpu.c +++ b/lib/jit_alpha-fpu.c @@ -321,6 +321,12 @@ static void _fmar_f(jit_state_t*, # define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3) static void _fmsr_f(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3) +static void _fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3) @@ -329,6 +335,12 @@ static void _fmar_d(jit_state_t*, # define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3) static void _fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3) +static void _fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define extr_f_d(r0,r1) movr_d(r0,r1) # define extr_d_f(r0,r1) movr_f(r0,r1) # define truncr_f_i(r0,r1) truncr_d_i(r0,r1) @@ -762,6 +774,30 @@ _fmsr_f(jit_state_t *_jit, } static void +_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void _sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { SQRTT_SU(r1, r0); @@ -803,6 +839,30 @@ _fmsr_d(jit_state_t *_jit, } static void +_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { stxi_l(-8, _FP_REGNO, r1); diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index 91b892a..d2d378f 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -911,6 +911,12 @@ _emit_code(jit_state_t *_jit) name##i##type(rn(node->u.w), rn(node->v.w), \ (jit_float##size##_t *)node->w.n->u.w); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1256,16 +1262,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1369,16 +1369,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c index f63d939..46e4308 100644 --- a/lib/jit_arm-swf.c +++ b/lib/jit_arm-swf.c @@ -168,6 +168,18 @@ static void _swf_fmar_d(jit_state_t*, # define swf_fmsr_d(r0,r1,r2,r3) _swf_fmsr_d(_jit,r0,r1,r2,r3) static void _swf_fmsr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define swf_fnmar_f(r0,r1,r2,r3) _swf_fnmar_f(_jit,r0,r1,r2,r3) +static void _swf_fnmar_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define swf_fnmsr_f(r0,r1,r2,r3) _swf_fnmsr_f(_jit,r0,r1,r2,r3) +static void _swf_fnmsr_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define swf_fnmar_d(r0,r1,r2,r3) _swf_fnmar_d(_jit,r0,r1,r2,r3) +static void _swf_fnmar_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define swf_fnmsr_d(r0,r1,r2,r3) _swf_fnmsr_d(_jit,r0,r1,r2,r3) +static void _swf_fnmsr_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); # define swf_addr_f(r0,r1,r2) swf_fff(__addsf3,r0,r1,r2) # define swf_addi_f(r0,r1,i0) swf_fff_(__addsf3,r0,r1,i0) # define swf_addr_d(r0,r1,r2) swf_ddd(__adddf3,r0,r1,r2) @@ -2241,6 +2253,30 @@ _swf_fmsr_f(jit_state_t *_jit, } static void +_swf_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + swf_negr_f(rn(t0), r1); + swf_mulr_f(rn(t0), rn(t0), r2); + swf_subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_swf_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + swf_negr_f(rn(t0), r1); + swf_mulr_f(rn(t0), rn(t0), r2); + swf_addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void _swf_fmar_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { @@ -2275,6 +2311,30 @@ _swf_fmsr_d(jit_state_t *_jit, } static void +_swf_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + swf_negr_d(rn(t0), r1); + swf_mulr_d(rn(t0), rn(t0), r2); + swf_subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_swf_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + swf_negr_d(rn(t0), r1); + swf_mulr_d(rn(t0), rn(t0), r2); + swf_addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void _swf_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { swf_iff(__aeabi_fcmpeq, r0, r1, r2); diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c index dfe43e4..adf6a3b 100644 --- a/lib/jit_arm-vfp.c +++ b/lib/jit_arm-vfp.c @@ -90,6 +90,8 @@ # define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64 # define ARM_VFMA 0x0ea00a00 # define ARM_VFMS 0x0ea00a40 +# define ARM_VFNMA 0x0e900a00 +# define ARM_VFNMS 0x0e900a40 # define ARM_V_D 0x00400000 # define ARM_V_N 0x00000080 # define ARM_V_Q 0x00000040 @@ -222,6 +224,14 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); # define VFMS_F32(r0,r1,r2) CC_VFMS_F32(ARM_CC_AL,r0,r1,r2) # define CC_VFMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS|ARM_V_F64,r0,r1,r2) # define VFMS_F64(r0,r1,r2) CC_VFMS_F64(ARM_CC_AL,r0,r1,r2) +# define CC_VFNMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA,r0,r1,r2) +# define VFNMA_F32(r0,r1,r2) CC_VFNMA_F32(ARM_CC_AL,r0,r1,r2) +# define CC_VFNMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA|ARM_V_F64,r0,r1,r2) +# define VFNMA_F64(r0,r1,r2) CC_VFNMA_F64(ARM_CC_AL,r0,r1,r2) +# define CC_VFNMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS,r0,r1,r2) +# define VFNMS_F32(r0,r1,r2) CC_VFNMS_F32(ARM_CC_AL,r0,r1,r2) +# define CC_VFNMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS|ARM_V_F64,r0,r1,r2) +# define VFNMS_F64(r0,r1,r2) CC_VFNMS_F64(ARM_CC_AL,r0,r1,r2) # define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1) # define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1) # define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1) @@ -521,6 +531,12 @@ static void _vfp_fmar_f(jit_state_t*, # define vfp_fmsr_f(r0,r1,r2,r3) _vfp_fmsr_f(_jit,r0,r1,r2,r3) static void _vfp_fmsr_f(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vfp_fnmar_f(r0,r1,r2,r3) _vfp_fnmar_f(_jit,r0,r1,r2,r3) +static void _vfp_fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vfp_fnmsr_f(r0,r1,r2,r3) _vfp_fnmsr_f(_jit,r0,r1,r2,r3) +static void _vfp_fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1) # define vfp_fmar_d(r0,r1,r2,r3) _vfp_fmar_d(_jit,r0,r1,r2,r3) static void _vfp_fmar_d(jit_state_t*, @@ -528,6 +544,12 @@ static void _vfp_fmar_d(jit_state_t*, # define vfp_fmsr_d(r0,r1,r2,r3) _vfp_fmsr_d(_jit,r0,r1,r2,r3) static void _vfp_fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vfp_fnmar_d(r0,r1,r2,r3) _vfp_fnmar_d(_jit,r0,r1,r2,r3) +static void _vfp_fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vfp_fnmsr_d(r0,r1,r2,r3) _vfp_fnmsr_d(_jit,r0,r1,r2,r3) +static void _vfp_fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2) # define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0) static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); @@ -1490,7 +1512,8 @@ _vfp_fmar_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t t0; - if (jit_armv7r_p()) { + /* untested */ + if (0 && jit_cpu.vfp >= 4) { if (r0 != r2 && r0 != r3) { vfp_movr_f(r0, r1); VFMA_F32(r0, r2, r3); @@ -1507,7 +1530,7 @@ _vfp_fmar_f(jit_state_t *_jit, if (r0 != r3) { vfp_mulr_f(r0, r1, r2); vfp_addr_f(r0, r0, r3); - } + } else { t0 = jit_get_reg(jit_class_fpr); vfp_mulr_f(rn(t0), r1, r2); @@ -1522,7 +1545,8 @@ _vfp_fmsr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t t0; - if (jit_armv7r_p()) { + /* untested */ + if (0 && jit_cpu.vfp >= 4) { if (r0 != r2 && r0 != r3) { vfp_movr_f(r0, r1); VFMS_F32(r0, r2, r3); @@ -1551,11 +1575,69 @@ _vfp_fmsr_f(jit_state_t *_jit, } static void +_vfp_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + /* untested */ + if (0 && jit_cpu.vfp >= 4) { + if (r0 != r2 && r0 != r3) { + vfp_movr_f(r0, r1); + VFNMA_F32(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_movr_f(rn(t0), r1); + VFNMA_F32(rn(t0), r2, r3); + vfp_movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_negr_f(rn(t0), r1); + vfp_mulr_f(rn(t0), rn(t0), r2); + vfp_subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_vfp_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + /* untested */ + if (0 && jit_cpu.vfp >= 4) { + if (r0 != r2 && r0 != r3) { + vfp_movr_f(r0, r1); + VFNMS_F32(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_movr_f(rn(t0), r1); + VFNMS_F32(rn(t0), r2, r3); + vfp_movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + vfp_negr_f(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_negr_f(rn(t0), r1); + vfp_mulr_f(rn(t0), rn(t0), r2); + vfp_addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void _vfp_fmar_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t t0; - if (jit_armv7r_p()) { + /* untested */ + if (0 && jit_cpu.vfp >= 4) { if (r0 != r2 && r0 != r3) { vfp_movr_d(r0, r1); VFMA_F64(r0, r2, r3); @@ -1587,7 +1669,8 @@ _vfp_fmsr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t t0; - if (jit_armv7r_p()) { + /* untested */ + if (0 && jit_cpu.vfp >= 4) { if (r0 != r2 && r0 != r3) { vfp_movr_d(r0, r1); VFMS_F64(r0, r2, r3); @@ -1615,6 +1698,63 @@ _vfp_fmsr_d(jit_state_t *_jit, } } +static void +_vfp_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + /* untested */ + if (0 && jit_cpu.vfp >= 4) { + if (r0 != r2 && r0 != r3) { + vfp_movr_d(r0, r1); + VFNMA_F64(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_movr_d(rn(t0), r1); + VFNMA_F64(rn(t0), r2, r3); + vfp_movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_negr_d(rn(t0), r1); + vfp_mulr_d(rn(t0), rn(t0), r2); + vfp_subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_vfp_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + /* untested */ + if (0 && jit_cpu.vfp >= 4) { + if (r0 != r2 && r0 != r3) { + vfp_movr_d(r0, r1); + VFNMS_F64(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_movr_d(rn(t0), r1); + VFNMS_F64(rn(t0), r2, r3); + vfp_movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + vfp_negr_d(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); + vfp_negr_d(rn(t0), r1); + vfp_mulr_d(rn(t0), rn(t0), r2); + vfp_addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + # define fopi(name) \ static void \ _vfp_##name##i_f(jit_state_t *_jit, \ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 8d39b9a..df6c0e7 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1342,6 +1342,16 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + if (jit_swf_p()) \ + swf_##name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w));\ + else \ + vfp_##name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w));\ + case jit_code_##name##i##type: \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1791,24 +1801,10 @@ _emit_code(jit_state_t *_jit) case_vv(abs, _f); case_vv(neg, _f); case_vv(sqrt, _f); - case jit_code_fmar_f: - if (jit_swf_p()) - swf_fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else - vfp_fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - if (jit_swf_p()) - swf_fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else - vfp_fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_vv(ext, _f); case_vv(ld, _f); case_vw(ld, _f); @@ -1919,24 +1915,10 @@ _emit_code(jit_state_t *_jit) case_vv(abs, _d); case_vv(neg, _d); case_vv(sqrt, _d); - case jit_code_fmar_d: - if (jit_swf_p()) - swf_fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else - vfp_fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - if (jit_swf_p()) - swf_fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else - vfp_fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_vv(ext, _d); case_vv(ld, _d); case_vw(ld, _d); diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index c676884..699d31e 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -277,6 +277,12 @@ static void _fmar_f(jit_state_t*, #define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3) static void _fmsr_f(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3) +static void _fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define sqrtr_d(r0,r1) FSQRT_D(r1,r0) #define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3) static void _fmar_d(jit_state_t*, @@ -284,6 +290,12 @@ static void _fmar_d(jit_state_t*, #define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3) static void _fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3) +static void _fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define extr_f(r0,r1) _extr_f(_jit,r0,r1) static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t); #define extr_d(r0,r1) _extr_d(_jit,r0,r1) @@ -737,7 +749,6 @@ _fmsr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t reg; -#if 1 if (r0 != r3) { mulr_f(r0, r1, r2); subr_f(r0, r0, r3); @@ -748,12 +759,30 @@ _fmsr_f(jit_state_t *_jit, subr_f(r0, rn(reg), r3); jit_unget_reg(reg); } -#else - reg = jit_get_reg(jit_class_fpr); - negr_f(rn(reg), r3); - fmar_f(r0, r1, r2, rn(reg)); - jit_unget_reg(reg); -#endif +} + +static void +_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); } static void @@ -813,7 +842,6 @@ _fmsr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t reg; -#if 1 if (r0 != r3) { mulr_d(r0, r1, r2); subr_d(r0, r0, r3); @@ -824,12 +852,30 @@ _fmsr_d(jit_state_t *_jit, subr_d(r0, rn(reg), r3); jit_unget_reg(reg); } -#else - reg = jit_get_reg(jit_class_fpr); - negr_d(rn(reg), r3); - fmar_d(r0, r1, r2, rn(reg)); - jit_unget_reg(reg); -#endif +} + +static void +_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); } static void diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index b62ceb5..e392c6d 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -899,6 +899,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1259,27 +1265,15 @@ _emit_code(jit_state_t *_jit) case_rr(neg, _f); case_rr(neg, _d); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rrr(add, _f); case_rrf(add, _f, 32); case_rrr(add, _d); diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index bd026f5..e29bff7 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -455,10 +455,18 @@ static void _movi_w_d(jit_state_t*, jit_int32_t, jit_word_t); static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t); #define fmar_f(r0,r1,r2,r3) FMA_S(r0,r2,r3,r1) #define fmsr_f(r0,r1,r2,r3) FMS_S(r0,r2,r3,r1) +#define fnmar_f(r0,r1,r2,r3) FNMA_S(r0,r2,r3,r1) +#define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t); #define fmar_d(r0,r1,r2,r3) FMA_D(r0,r2,r3,r1) #define fmsr_d(r0,r1,r2,r3) FMS_D(r0,r2,r3,r1) +#define fnmar_d(r0,r1,r2,r3) FNMA_D(r0,r2,r3,r1) +#define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define extr_f_d(r0,r1) FNORM_D(r0,r1) #define extr_d_f(r0,r1) FNORM_S(r0,r1) #define extr_f(r0,r1) _extr_f(_jit,r0,r1) @@ -1565,6 +1573,14 @@ _sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void +_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + fmsr_f(r0, r1, r2, r3); + negr_f(r0, r0); +} + +static void _sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr_d(GR_8, r1); @@ -1572,6 +1588,14 @@ _sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) MOVF(r0, GR_8); } +static void +_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + fmsr_d(r0, r1, r2, r3); + negr_d(r0, r0); +} + static jit_word_t _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index cf3e5b2..d385e8d 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1048,6 +1048,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1409,16 +1415,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ld, _f); case_rw(ld, _f); case_rrr(ldx, _f); @@ -1514,16 +1514,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ld, _d); case_rw(ld, _d); case_rrr(ldx, _d); diff --git a/lib/jit_loongarch-fpu.c b/lib/jit_loongarch-fpu.c index 8559791..a3fd02a 100644 --- a/lib/jit_loongarch-fpu.c +++ b/lib/jit_loongarch-fpu.c @@ -217,6 +217,8 @@ static void _divi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t); # define sqrtr_f(r0, r1) FSQRT_S(r0, r1) # define fmar_f(r0, r1, r2, r3) FMADD_S(r0, r1, r2, r3) # define fmsr_f(r0, r1, r2, r3) FMSUB_S(r0, r1, r2, r3) +# define fnmar_f(r0, r1, r2, r3) FNMADD_S(r0, r1, r2, r3) +# define fnmsr_f(r0, r1, r2, r3) FNMSUB_S(r0, r1, r2, r3) # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define ldr_f(r0, r1) FLD_S(r0, r1, 0) @@ -379,6 +381,8 @@ static void _divi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); # define sqrtr_d(r0, r1) FSQRT_D(r0, r1) # define fmar_d(r0, r1, r2, r3) FMADD_D(r0, r1, r2, r3) # define fmsr_d(r0, r1, r2, r3) FMSUB_D(r0, r1, r2, r3) +# define fnmar_d(r0, r1, r2, r3) FNMADD_D(r0, r1, r2, r3) +# define fnmsr_d(r0, r1, r2, r3) FNMSUB_D(r0, r1, r2, r3) # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define ldr_d(r0, r1) FLD_D(r0, r1, 0) diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c index bdf98e5..b892563 100644 --- a/lib/jit_loongarch.c +++ b/lib/jit_loongarch.c @@ -968,6 +968,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1339,16 +1345,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1444,16 +1444,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 24afbb2..78871a9 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -1447,6 +1447,10 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask, case MIPS_MADD_fmt_D: case MIPS_MSUB_fmt_S: case MIPS_MSUB_fmt_D: + case MIPS_NMADD_fmt_S: + case MIPS_NMADD_fmt_D: + case MIPS_NMSUB_fmt_S: + case MIPS_NMSUB_fmt_D: assert(!jit_mips6_p()); if (mask & jit_class_gpr) regs[0] = regs[1] = regs[2] = 0; diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c index 9c4b620..ca74956 100644 --- a/lib/jit_mips-fpu.c +++ b/lib/jit_mips-fpu.c @@ -150,8 +150,14 @@ # define MADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_D) # define MSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_S) # define MSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_D) +# define NMADD_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_S) +# define NMADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_D) +# define NMSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_S) +# define NMSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_D) +/* fd = fd + (fs * ft) */ # define MADDF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MADDF) # define MADDF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MADDF) +/* fd = fd - (fs * ft) */ # define MSUBF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MSUBF) # define MSUBF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MSUBF) # define LWC1(rt, of, rb) hrri(MIPS_LWC1, rb, rt, of) @@ -319,9 +325,9 @@ static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_f(i0, r0, r1) _stxi_f(_jit, i0, r0, r1) static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define unstr_x(r0, r1, i0) _unstr_x(_jit, r0, r1, i0) +# define unstr_x(r0, r1, i0) _unstr_x(_jit, r0, r1, i0) static void _unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -#define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1) +# define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1) static void _unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3) static void _fmar_f(jit_state_t*, @@ -329,6 +335,12 @@ static void _fmar_f(jit_state_t*, # define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3) static void _fmsr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmar_f(r0, r1, r2, r3) _fnmar_f(_jit, r0, r1, r2, r3) +static void _fnmar_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmsr_f(r0, r1, r2, r3) _fnmsr_f(_jit, r0, r1, r2, r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); # define movr_f(r0, r1) _movr_f(_jit, r0, r1) static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t); # define movi_f(r0, i0) _movi_f(_jit, r0, i0) @@ -382,6 +394,12 @@ static void _fmar_d(jit_state_t*, # define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3) static void _fmsr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmar_d(r0, r1, r2, r3) _fnmar_d(_jit, r0, r1, r2, r3) +static void _fnmar_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmsr_d(r0, r1, r2, r3) _fnmsr_d(_jit, r0, r1, r2, r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define movi_d(r0, i0) _movi_d(_jit, r0, i0) @@ -961,7 +979,6 @@ _fmsr_f(jit_state_t *_jit, jit_int32_t t0; if (jit_mips2_p()) { if (jit_mips6_p()) { - /* fd = fd - (fs * ft) */ if (r0 == r3) MSUBF_S(r0, r2, r1); else { @@ -991,6 +1008,65 @@ _fmsr_f(jit_state_t *_jit, } static void +_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MADDF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_f(r0, r0); + } + else + NMADD_S(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MSUBF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + NMSUB_S(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (r0 != r1) @@ -1347,7 +1423,6 @@ _fmsr_d(jit_state_t *_jit, jit_int32_t t0; if (jit_mips2_p()) { if (jit_mips6_p()) { - /* fd = fd - (fs * ft) */ if (r0 == r3) MSUBF_D(r0, r2, r1); else { @@ -1377,6 +1452,65 @@ _fmsr_d(jit_state_t *_jit, } static void +_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MADDF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_d(r0, r0); + } + else + NMADD_D(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MSUBF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + NMSUB_D(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (r0 != r1) diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 17fd531..e49307d 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1434,6 +1434,12 @@ _emit_code(jit_state_t *_jit) name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), node->w.w); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrf(name, type, size) \ case jit_code_##name##i##type: \ assert(node->flag & jit_flag_data); \ @@ -1800,16 +1806,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1905,16 +1905,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_names.c b/lib/jit_names.c index a6bb023..eba8e30 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -291,4 +291,8 @@ static char *code_name[] = { "fmsr_f", "fmsi_f", "fmar_d", "fmai_d", "fmsr_d", "fmsi_d", + "fnmar_f", "fnmai_f", + "fnmsr_f", "fnmsi_f", + "fnmar_d", "fnmai_d", + "fnmsr_d", "fnmsi_d", }; diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index ab066a8..5366f05 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -164,6 +164,10 @@ static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define fmar_d(r0,r1,r2,r3) FMADD(r0,r1,r3,r2) # define fmsr_f(r0,r1,r2,r3) FMSUBS(r0,r1,r3,r2) # define fmsr_d(r0,r1,r2,r3) FMSUB(r0,r1,r3,r2) +# define fnmar_f(r0,r1,r2,r3) FNMADDS(r0,r1,r3,r2) +# define fnmar_d(r0,r1,r2,r3) FNMADD(r0,r1,r3,r2) +# define fnmsr_f(r0,r1,r2,r3) FNMSUBS(r0,r1,r3,r2) +# define fnmsr_d(r0,r1,r2,r3) FNMSUB(r0,r1,r3,r2) # define addr_f(r0,r1,r2) FADDS(r0,r1,r2) # define addr_d(r0,r1,r2) FADD(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 8653b57..32dda20 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1220,6 +1220,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1583,16 +1589,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rrr(add, _f); case_rrf(add, _f, 32); case_rrr(sub, _f); @@ -1692,16 +1692,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rrr(add, _d); case_rrf(add, _d, 64); case_rrr(sub, _d); diff --git a/lib/jit_riscv-fpu.c b/lib/jit_riscv-fpu.c index 68ecf36..bfe64fc 100644 --- a/lib/jit_riscv-fpu.c +++ b/lib/jit_riscv-fpu.c @@ -129,6 +129,8 @@ static void _divi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t); # define sqrtr_f(r0, r1) FSQRT_S(r0, r1) # define fmar_f(r0, r1, r2, r3) FMADD_S(r0, r1, r2, r3) # define fmsr_f(r0, r1, r2, r3) FMSUB_S(r0, r1, r2, r3) +# define fnmar_f(r0, r1, r2, r3) FNMADD_S(r0, r1, r2, r3) +# define fnmsr_f(r0, r1, r2, r3) FNMSUB_S(r0, r1, r2, r3) # define extr_f(r0, r1) FCVT_S_L(r0, r1) # define ldr_f(r0, r1) FLW(r0, r1, 0) # define ldi_f(r0, im) _ldi_f(_jit, r0, im) @@ -287,6 +289,8 @@ static void _divi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t); # define sqrtr_d(r0, r1) FSQRT_D(r0, r1) # define fmar_d(r0, r1, r2, r3) FMADD_D(r0, r1, r2, r3) # define fmsr_d(r0, r1, r2, r3) FMSUB_D(r0, r1, r2, r3) +# define fnmar_d(r0, r1, r2, r3) FNMADD_D(r0, r1, r2, r3) +# define fnmsr_d(r0, r1, r2, r3) FNMSUB_D(r0, r1, r2, r3) # define extr_d(r0, r1) FCVT_D_L(r0, r1) # define ldr_d(r0, r1) FLD(r0, r1, 0) # define ldi_d(r0, im) _ldi_d(_jit, r0, im) diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c index a923ef0..2a399a9 100644 --- a/lib/jit_riscv.c +++ b/lib/jit_riscv.c @@ -1020,6 +1020,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1402,16 +1408,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1507,16 +1507,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_s390-fpu.c b/lib/jit_s390-fpu.c index 9d6f350..db848d9 100644 --- a/lib/jit_s390-fpu.c +++ b/lib/jit_s390-fpu.c @@ -359,6 +359,12 @@ static void _fmar_f(jit_state_t*, # define fmsr_f(r0,r1,r2,r3) _fmsr_f(_jit,r0,r1,r2,r3) static void _fmsr_f(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmar_f(r0,r1,r2,r3) _fnmar_f(_jit,r0,r1,r2,r3) +static void _fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmsr_f(r0,r1,r2,r3) _fnmsr_f(_jit,r0,r1,r2,r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define sqrtr_d(r0,r1) SQDBR(r0,r1) # define fmar_d(r0,r1,r2,r3) _fmar_d(_jit,r0,r1,r2,r3) static void _fmar_d(jit_state_t*, @@ -366,6 +372,12 @@ static void _fmar_d(jit_state_t*, # define fmsr_d(r0,r1,r2,r3) _fmsr_d(_jit,r0,r1,r2,r3) static void _fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmar_d(r0,r1,r2,r3) _fnmar_d(_jit,r0,r1,r2,r3) +static void _fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fnmsr_d(r0,r1,r2,r3) _fnmsr_d(_jit,r0,r1,r2,r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define truncr_f_i(r0,r1) CFEBR(r0,RND_ZERO,r1) # define truncr_d_i(r0,r1) CFDBR(r0,RND_ZERO,r1) # if __WORDSIZE == 64 @@ -1012,6 +1024,40 @@ _fmsr_f(jit_state_t* _jit, } static void +_fnmar_f(jit_state_t* _jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + if (r0 == r3) { + MAER(r0, r2, r1); + negr_f(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MAER(rn(t0), r2, r1); + negr_f(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_f(jit_state_t* _jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + if (r0 == r3) { + MSER(r0, r2, r1); + negr_f(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MSER(rn(t0), r2, r1); + negr_f(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void _fmar_d(jit_state_t* _jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { @@ -1019,9 +1065,9 @@ _fmar_d(jit_state_t* _jit, MADR(r0, r2, r1); else { t0 = jit_get_reg(jit_class_fpr); - movr_f(rn(t0), r3); + movr_d(rn(t0), r3); MADR(rn(t0), r2, r1); - movr_f(r0, rn(t0)); + movr_d(r0, rn(t0)); jit_unget_reg(t0); } } @@ -1034,9 +1080,43 @@ _fmsr_d(jit_state_t* _jit, MSDR(r0, r2, r1); else { t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MSDR(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_fnmar_d(jit_state_t* _jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + if (r0 == r3) { + MADR(r0, r2, r1); + negr_d(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); movr_f(rn(t0), r3); + MADR(rn(t0), r2, r1); + negr_d(r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_d(jit_state_t* _jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + if (r0 == r3) { + MSDR(r0, r2, r1); + negr_d(r0, r0); + } + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); MSDR(rn(t0), r2, r1); - movr_f(r0, rn(t0)); + negr_d(r0, rn(t0)); jit_unget_reg(t0); } } diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 0b31a9a..858ea30 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -978,6 +978,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1379,16 +1385,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); @@ -1488,16 +1488,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index 474815d..3cdb870 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -130,14 +130,6 @@ _f4f(jit_state_t*,jit_int32_t,jit_int32_t, # define FSQRTS(rs2, rd) FPop1(rd, 0, 41, rs2) # define FSQRTD(rs2, rd) FPop1(rd, 0, 42, rs2) # define FSQRTQ(rs2, rd) FPop1(rd, 0, 43, rs2) -# define FMADDS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDS, rs2) -# define FMADDD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDD, rs2) -# define FMSUBS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBS, rs2) -# define FMSUBD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBD, rs2) -# define FNMSUBS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBS,rs2) -# define FNMSUBD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBD,rs2) -# define FNMADDS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDS,rs2) -# define FNMADDD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDD,rs2) # define SPARC_FADDS 65 # define SPARC_FADDD 66 # define SPARC_FADDQ 67 @@ -160,6 +152,14 @@ _f4f(jit_state_t*,jit_int32_t,jit_int32_t, # define SPARC_FNMSUBD 10 # define SPARC_FNMADDS 13 # define SPARC_FNMADDD 14 +# define FMADDS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDS, rs2) +# define FMADDD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDD, rs2) +# define FMSUBS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBS, rs2) +# define FMSUBD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBD, rs2) +# define FNMSUBS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBS,rs2) +# define FNMSUBD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBD,rs2) +# define FNMADDS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDS,rs2) +# define FNMADDD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDD,rs2) # define FADDS(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDS, rs2) # define FADDD(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDD, rs2) # define FADDQ(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDQ, rs2) @@ -236,6 +236,10 @@ static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define fmsr_f(r0, r1, r2, r3) FMSUBS(r1, r2, r3, r0) # define fmar_d(r0, r1, r2, r3) FMADDD(r1, r2, r3, r0) # define fmsr_d(r0, r1, r2, r3) FMSUBD(r1, r2, r3, r0) +# define fnmar_f(r0, r1, r2, r3) FNMADDS(r1, r2, r3, r0) +# define fnmsr_f(r0, r1, r2, r3) FNMSUBS(r1, r2, r3, r0) +# define fnmar_d(r0, r1, r2, r3) FNMADDD(r1, r2, r3, r0) +# define fnmsr_d(r0, r1, r2, r3) FNMSUBD(r1, r2, r3, r0) # else # define fop3f(op, r0, r1, r2, r3) _fop3f(_jit, op, r0, r1, r2, r3) static void _fop3f(jit_state_t*, jit_int32_t, jit_int32_t, @@ -244,6 +248,10 @@ static void _fop3f(jit_state_t*, jit_int32_t, jit_int32_t, # define fmsr_f(r0, r1, r2, r3) fop3f(SPARC_FMSUBS, r0, r1, r2, r3) # define fmar_d(r0, r1, r2, r3) fop3f(SPARC_FMADDD, r0, r1, r2, r3) # define fmsr_d(r0, r1, r2, r3) fop3f(SPARC_FMSUBD, r0, r1, r2, r3) +# define fnmar_f(r0, r1, r2, r3) fop3f(SPARC_FNMADDS, r0, r1, r2, r3) +# define fnmsr_f(r0, r1, r2, r3) fop3f(SPARC_FNMSUBS, r0, r1, r2, r3) +# define fnmar_d(r0, r1, r2, r3) fop3f(SPARC_FNMADDD, r0, r1, r2, r3) +# define fnmsr_d(r0, r1, r2, r3) fop3f(SPARC_FNMSUBD, r0, r1, r2, r3) # endif # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 5d4ce85..1acf636 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -1292,6 +1292,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), \ @@ -1680,16 +1686,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); - case jit_code_fmar_f: - fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_f: - break; - case jit_code_fmsr_f: - fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_f: - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ext, _d_f); case_rrr(lt, _f); @@ -1789,16 +1789,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); - case jit_code_fmar_d: - fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmai_d: - break; - case jit_code_fmsr_d: - fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - case jit_code_fmsi_d: - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ext, _f_d); case_rrr(lt, _d); diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index 8e4422f..930efed 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -178,6 +178,18 @@ static void _sse_fmsr_f(jit_state_t*, # define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3) static void _sse_fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3) +static void _sse_fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3) +static void _sse_fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3) +static void _sse_fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3) +static void _sse_fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2) # define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2) static void @@ -1001,6 +1013,114 @@ _sse_fmsr_d(jit_state_t *_jit, } static void +_sse_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_cpu.fma) { + if (r0 != r2 && r0 != r3) { + sse_negr_f(r0, r1); + vfmsub213ss(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_f(rn(t0), r1); + vfmsub213ss(rn(t0), r2, r3); + sse_movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_f(rn(t0), r1); + sse_mulr_f(rn(t0), rn(t0), r2); + sse_subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_sse_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_cpu.fma) { + if (r0 != r2 && r0 != r3) { + sse_negr_d(r0, r1); + vfmsub213sd(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_d(rn(t0), r1); + vfmsub213sd(rn(t0), r2, r3); + sse_movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_d(rn(t0), r1); + sse_mulr_d(rn(t0), rn(t0), r2); + sse_subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_sse_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_cpu.fma) { + if (r0 != r2 && r0 != r3) { + sse_negr_f(r0, r1); + vfmadd213ss(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_f(rn(t0), r1); + vfmadd213ss(rn(t0), r2, r3); + sse_movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_f(rn(t0), r1); + sse_mulr_f(rn(t0), rn(t0), r2); + sse_addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_sse_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_cpu.fma) { + if (r0 != r2 && r0 != r3) { + sse_negr_d(r0, r1); + vfmadd213sd(r0, r2, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_d(rn(t0), r1); + vfmadd213sd(rn(t0), r2, r3); + sse_movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + t0 = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_negr_d(rn(t0), r1); + sse_mulr_d(rn(t0), rn(t0), r2); + sse_addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c index a64287e..61040da 100644 --- a/lib/jit_x86-x87.c +++ b/lib/jit_x86-x87.c @@ -132,6 +132,18 @@ static void _x87_fmsr_f(jit_state_t*, # define x87_fmsr_d(r0, r1, r2, r3) _x87_fmsr_d(_jit, r0, r1, r2, r3) static void _x87_fmsr_d(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define x87_fnmar_f(r0, r1, r2, r3) _x87_fnmar_f(_jit, r0, r1, r2, r3) +static void _x87_fnmar_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define x87_fnmar_d(r0, r1, r2, r3) _x87_fnmar_d(_jit, r0, r1, r2, r3) +static void _x87_fnmar_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define x87_fnmsr_f(r0, r1, r2, r3) _x87_fnmsr_f(_jit, r0, r1, r2, r3) +static void _x87_fnmsr_f(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define x87_fnmsr_d(r0, r1, r2, r3) _x87_fnmsr_d(_jit, r0, r1, r2, r3) +static void _x87_fnmsr_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define x87_truncr_f_i(r0, r1) _x87_truncr_d_i(_jit, r0, r1) # define x87_truncr_d_i(r0, r1) _x87_truncr_d_i(_jit, r0, r1) static void _x87_truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); @@ -773,6 +785,54 @@ _x87_fmsr_d(jit_state_t *_jit, } static void +_x87_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + x87_negr_f(rn(t0), r1); + x87_mulr_f(rn(t0), rn(t0), r2); + x87_subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_x87_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + x87_negr_d(rn(t0), r1); + x87_mulr_d(rn(t0), rn(t0), r2); + x87_subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_x87_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + x87_negr_f(rn(t0), r1); + x87_mulr_f(rn(t0), rn(t0), r2); + x87_addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void +_x87_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_fpr); + x87_negr_d(rn(t0), r1); + x87_mulr_d(rn(t0), rn(t0), r2); + x87_addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); +} + +static void _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { CHECK_CVT_OFFSET(); diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 8f49e6d..fb5f3ca 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1557,6 +1557,27 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + if (jit_x87_reg_p(node->u.w) && \ + jit_x87_reg_p(node->v.q.l) && \ + jit_x87_reg_p(node->v.q.h) && \ + jit_x87_reg_p(node->w.w)) \ + x87_##name##r##type(rn(node->u.w), \ + rn(node->v.q.l), \ + rn(node->v.q.h), \ + rn(node->w.w)); \ + else { \ + assert(jit_sse_reg_p(node->u.w) && \ + jit_sse_reg_p(node->v.q.l) && \ + jit_sse_reg_p(node->v.q.h) && \ + jit_sse_reg_p(node->w.w)); \ + sse_##name##r##type(rn(node->u.w), \ + rn(node->v.q.l), \ + rn(node->v.q.h), \ + rn(node->w.w)); \ + } \ + break; #define case_frr(name, type) \ case jit_code_##name##r##type: \ if (jit_x87_reg_p(node->u.w)) \ @@ -2038,36 +2059,10 @@ _emit_code(jit_state_t *_jit) case_ff(abs, _f); case_ff(neg, _f); case_ff(sqrt, _f); - case jit_code_fmar_f: - if (jit_x87_reg_p(node->u.w) && - jit_x87_reg_p(node->v.q.l) && - jit_x87_reg_p(node->v.q.h) && - jit_x87_reg_p(node->w.w)) - x87_fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else { - assert(jit_sse_reg_p(node->u.w) && - jit_sse_reg_p(node->v.q.l) && - jit_sse_reg_p(node->v.q.h) && - jit_sse_reg_p(node->w.w)); - sse_fmar_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - } - break; - case jit_code_fmsr_f: - if (jit_x87_reg_p(node->u.w) && jit_x87_reg_p(node->v.q.l) && - jit_x87_reg_p(node->v.q.h) && jit_x87_reg_p(node->w.w)) - x87_fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else { - assert(jit_sse_reg_p(node->u.w) && - jit_sse_reg_p(node->v.q.l) && - jit_sse_reg_p(node->v.q.h) && - jit_sse_reg_p(node->w.w)); - sse_fmsr_f(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - } - break; + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_fr(ext, _f); case_fr(ext, _d_f); case_rff(lt, _f); @@ -2191,36 +2186,10 @@ _emit_code(jit_state_t *_jit) case_ff(abs, _d); case_ff(neg, _d); case_ff(sqrt, _d); - case jit_code_fmar_d: - if (jit_x87_reg_p(node->u.w) && - jit_x87_reg_p(node->v.q.l) && - jit_x87_reg_p(node->v.q.h) && - jit_x87_reg_p(node->w.w)) - x87_fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else { - assert(jit_sse_reg_p(node->u.w) && - jit_sse_reg_p(node->v.q.l) && - jit_sse_reg_p(node->v.q.h) && - jit_sse_reg_p(node->w.w)); - sse_fmar_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - } - break; - case jit_code_fmsr_d: - if (jit_x87_reg_p(node->u.w) && jit_x87_reg_p(node->v.q.l) && - jit_x87_reg_p(node->v.q.h) && jit_x87_reg_p(node->w.w)) - x87_fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - else { - assert(jit_sse_reg_p(node->u.w) && - jit_sse_reg_p(node->v.q.l) && - jit_sse_reg_p(node->v.q.h) && - jit_sse_reg_p(node->w.w)); - sse_fmsr_d(rn(node->u.w), rn(node->v.q.l), - rn(node->v.q.h), rn(node->w.w)); - } - break; + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_fr(ext, _d); case_fr(ext, _f_d); case_rff(lt, _d); @@ -2568,6 +2537,8 @@ _emit_code(jit_state_t *_jit) case jit_code_absi_d: case jit_code_sqrti_d: case jit_code_fmai_f: case jit_code_fmsi_f: case jit_code_fmai_d: case jit_code_fmsi_d: + case jit_code_fnmai_f: case jit_code_fnmsi_f: + case jit_code_fnmai_d: case jit_code_fnmsi_d: break; case jit_code_retval_f: #if __X32 diff --git a/lib/lightning.c b/lib/lightning.c index 19163ba..95502e5 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1714,14 +1714,18 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_fmar_f: case jit_code_fmar_d: case jit_code_fmsr_f: case jit_code_fmsr_d: + case jit_code_fnmar_f: case jit_code_fnmar_d: + case jit_code_fnmsr_f: case jit_code_fnmsr_d: mask = jit_cc_a0_reg|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg; break; case jit_code_fmai_f: case jit_code_fmsi_f: + case jit_code_fnmai_f: case jit_code_fnmsi_f: mask = jit_cc_a0_reg|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt; break; case jit_code_fmai_d: case jit_code_fmsi_d: + case jit_code_fnmai_d: case jit_code_fnmsi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl; break; @@ -4607,6 +4611,44 @@ _jit_fmsi_f(jit_state_t *_jit, } void +_jit_fnmai_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fnmar_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fnmar_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmsi_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fnmsr_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fnmsr_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void _jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) { jit_inc_synth_wd(negi_d, u, v); @@ -4671,6 +4713,44 @@ _jit_fmsi_d(jit_state_t *_jit, jit_dec_synth(); } +void +_jit_fnmai_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fnmar_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fnmar_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmsi_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fnmsr_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fnmsr_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + static void _cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { |