diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index fd25f7f..2983419 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -520,6 +520,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define subxr(r0,r1,r2) SBCS(r0,r1,r2) # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) MUL(r0,r1,r2) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1122,6 +1124,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c index ad20714..ea8c1cd 100644 --- a/lib/jit_aarch64-fpu.c +++ b/lib/jit_aarch64-fpu.c @@ -86,6 +86,9 @@ static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define subr_f(r0,r1,r2) FSUBS(r0,r1,r2) # define subi_f(r0,r1,i0) _subi_f(_jit,r0,r1,i0) static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) _rsbi_f(_jit, r0, r1, i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define mulr_f(r0,r1,r2) FMULS(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); @@ -198,6 +201,9 @@ static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define subr_d(r0,r1,r2) FSUBD(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) _rsbi_d(_jit, r0, r1, i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define mulr_d(r0,r1,r2) FMULD(r0,r1,r2) # define muli_d(r0,r1,i0) _muli_d(_jit,r0,r1,i0) static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); @@ -420,6 +426,7 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -636,6 +643,7 @@ fbopi(ltgt) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index 98a30f3..bfb0960 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 20, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -178,6 +179,7 @@ 12, /* addi_f */ 4, /* subr_f */ 12, /* subi_f */ + 12, /* rsbi_f */ 4, /* mulr_f */ 12, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 12, /* addi_d */ 4, /* subr_d */ 12, /* subi_d */ + 12, /* rsbi_d */ 4, /* mulr_d */ 12, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 6de141c..52d5c4f 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -744,6 +744,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -913,6 +914,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add); case_rrr(sub, _f); case_rrf(sub); + case_rrf(rsb); case_rrr(mul, _f); case_rrf(mul); case_rrr(div, _f); @@ -995,6 +997,7 @@ _emit_code(jit_state_t *_jit) case_rrd(add); case_rrr(sub, _d); case_rrd(sub); + case_rrd(rsb); case_rrr(mul, _d); case_rrd(mul); case_rrr(div, _d); diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index 8787b0f..03513fc 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -335,6 +335,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t); # define mulr(r0,r1,r2) MULQ(r1,r2,r0) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -955,6 +957,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_alpha-fpu.c b/lib/jit_alpha-fpu.c index 026fd41..e6f92cf 100644 --- a/lib/jit_alpha-fpu.c +++ b/lib/jit_alpha-fpu.c @@ -340,6 +340,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) _rsbi_f(_jit, r0, r1, i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) _rsbi_d(_jit, r0, r1, i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) @@ -726,6 +732,7 @@ _subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) TRAPB(); } fopi(sub) +fopi(rsb) static void _subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) @@ -734,6 +741,7 @@ _subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) TRAPB(); } dopi(sub) +dopi(rsb) static void _mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index a5c9ad2..5bcd24e 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -23,6 +23,7 @@ 40, /* subci */ 28, /* subxr */ 28, /* subxi */ + 36, /* rsbi */ 4, /* mulr */ 32, /* muli */ 44, /* qmulr */ @@ -178,6 +179,7 @@ 32, /* addi_f */ 8, /* subr_f */ 32, /* subi_f */ + 32, /* rsbi_f */ 8, /* mulr_f */ 32, /* muli_f */ 8, /* divr_f */ @@ -260,6 +262,7 @@ 28, /* addi_d */ 8, /* subr_d */ 28, /* subi_d */ + 28, /* rsbi_d */ 8, /* mulr_d */ 28, /* muli_d */ 8, /* divr_d */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index 6d78c70..6fbdf6d 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -771,6 +771,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -940,6 +941,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1022,6 +1024,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 3f7db05..552bce3 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -868,6 +868,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -1949,6 +1951,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t reg; diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c index f07d65d..8afd9e5 100644 --- a/lib/jit_arm-swf.c +++ b/lib/jit_arm-swf.c @@ -152,6 +152,12 @@ static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_subi_f(r0,r1,i0) swf_fff_(__aeabi_fsub,r0,r1,i0) # define swf_subr_d(r0,r1,r2) swf_ddd(__aeabi_dsub,r0,r1,r2) # define swf_subi_d(r0,r1,i0) swf_ddd_(__aeabi_dsub,r0,r1,i0) +# define swf_rsbr_f(r0, r1, r2) swf_subr_f(r0, r2, r1) +# define swf_rsbi_f(r0, r1, i0) _swf_rsbi_f(_jit, r0, r1, i0) +static void _swf_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define swf_rsbr_d(r0, r1, r2) swf_subr_d(r0, r2, r1) +# define swf_rsbi_d(r0, r1, i0) _swf_rsbi_d(_jit, r0, r1, i0) +static void _swf_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define swf_mulr_f(r0,r1,r2) swf_fff(__aeabi_fmul,r0,r1,r2) # define swf_muli_f(r0,r1,i0) swf_fff_(__aeabi_fmul,r0,r1,i0) # define swf_mulr_d(r0,r1,r2) swf_ddd(__aeabi_dmul,r0,r1,r2) @@ -691,6 +697,28 @@ _swf_fff_(jit_state_t *_jit, float (*i0)(float, float), } static void +_swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_get_reg_args(); + data.f = i0; + movi(_R0_REGNO, data.i); + if (jit_fpr_p(r1)) + swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8); + else + movr(_R1_REGNO, r1); + swf_call(__aeabi_fsub, fallback, _R3_REGNO); + if (jit_fpr_p(r0)) + swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + else + movr(r0, _R0_REGNO); + jit_unget_reg_args(); +} + +static void _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_int32_t r0, jit_int32_t r1, jit_float64_t i1) { @@ -699,6 +727,7 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_float64_t d; } data; jit_get_reg_args(); + data.d = i1; if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -731,6 +760,45 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), } static void +_swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) +{ + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + jit_get_reg_args(); + data.d = i0; + movi(_R0_REGNO, data.i[0]); + movi(_R1_REGNO, data.i[1]); + if (jit_fpr_p(r1)) { + if (!jit_thumb_p() && jit_armv5e_p()) + LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8); + else { + swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8); + swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4); + } + } + else { + movr(_R2_REGNO, r1); + movr(_R3_REGNO, r1 + 1); + } + swf_call_with_get_reg(__aeabi_dsub, fallback); + if (jit_fpr_p(r0)) { + if (!jit_thumb_p() && jit_armv5e_p()) + STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + else { + swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4); + } + } + else { + movr(r0, _R0_REGNO); + movr(r0 + 1, _R1_REGNO); + } + jit_unget_reg_args(); +} + +static void _swf_iff(jit_state_t *_jit, int (*i0)(float, float), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 988cc0a..5d45028 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -24,6 +24,7 @@ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ + 12, /* rsbi */ 4, /* mulr */ 8, /* muli */ 4, /* qmulr */ @@ -179,6 +180,7 @@ 8, /* addi_f */ 4, /* subr_f */ 8, /* subi_f */ + 8, /* rsbi_f */ 4, /* mulr_f */ 8, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 20, /* addi_d */ 4, /* subr_d */ 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ 20, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ + 12, /* rsbi */ 4, /* mulr */ 8, /* muli */ 4, /* qmulr */ @@ -532,6 +536,7 @@ 38, /* addi_f */ 38, /* subr_f */ 38, /* subi_f */ + 38, /* rsbi_f */ 38, /* mulr_f */ 38, /* muli_f */ 38, /* divr_f */ @@ -614,6 +619,7 @@ 52, /* addi_d */ 50, /* subr_d */ 52, /* subi_d */ + 52, /* rsbi_d */ 50, /* mulr_d */ 52, /* muli_d */ 50, /* divr_d */ diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c index 9b61dda..27e0ccf 100644 --- a/lib/jit_arm-vfp.c +++ b/lib/jit_arm-vfp.c @@ -504,6 +504,12 @@ static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2) # define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0) static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1) +# define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0) +static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1) +# define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0) +static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2) # define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0) static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); @@ -1455,6 +1461,8 @@ _vfp_b##name##i_d(jit_state_t *_jit, \ fopi(add) dopi(add) fopi(sub) +fopi(rsb) +dopi(rsb) dopi(sub) fopi(mul) dopi(mul) diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 179bf68..ed086dd 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1074,6 +1074,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1227,6 +1228,7 @@ _emit_code(jit_state_t *_jit) case_vvf(add); case_vvv(sub, _f); case_vvf(sub); + case_vvf(rsb); case_vvv(mul, _f); case_vvf(mul); case_vvv(div, _f); @@ -1312,6 +1314,7 @@ _emit_code(jit_state_t *_jit) case_vvd(add); case_vvv(sub, _d); case_vvd(sub); + case_vvd(rsb); case_vvv(mul, _d); case_vvd(mul); case_vvv(div, _d); diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 3fec623..59008cb 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -677,6 +677,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define subxr(r0,r1,r2) SUB_B(r1,r2,r0) #define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -1629,7 +1631,7 @@ static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 >= -2048 && i0 <= 2047) + if (i0 >= -1024 && i0 <= 1023) ADDI(i0, r1, r0); else { reg = jit_get_reg(jit_class_gpr); @@ -1690,6 +1692,20 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= -1024 && i0 <= 1023) + SUBI(i0, r1, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t t0, t1; diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index d1b6efe..6b6b8bf 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -301,6 +301,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); #define subr_d(r0,r1,r2) FSUB_D(r1,r2,r0) #define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +#define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +#define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +#define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +#define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); #define mulr_f(r0,r1,r2) FMPY_S(r1,r2,r0) #define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -741,6 +747,8 @@ fopi(add) dopi(add) fopi(sub) dopi(sub) +fopi(rsb) +dopi(rsb) fopi(mul) dopi(mul) fopi(div) diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 2c6916f..663162b 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 28, /* mulr */ 36, /* muli */ 40, /* qmulr */ @@ -178,6 +179,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 636f364..bc277bc 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -749,6 +749,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -924,8 +925,10 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(mul, _d); diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index bf70141..9baeafa 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1197,6 +1197,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -3622,6 +3624,22 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= -128 && i0 <= 127) + SUBI(r0, i0, r1); + else if (!(r1 & ~3) && i0 >= -2097151 && i0 <= 2097152) + ADDL(r1, -i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t f0, f1; diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index 5557d7b..1c5057c 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -474,6 +474,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); #define subr_d(r0,r1,r2) FSUB_D(r0,r1,r2) #define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +#define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +#define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +#define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +#define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); #define mulr_f(r0,r1,r2) FMPY_S(r0,r1,r2) #define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -1109,10 +1115,12 @@ _b##name##i_##type(jit_state_t *_jit, \ fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index 4711f61..8059d8c 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -23,6 +23,7 @@ 48, /* subci */ 64, /* subxr */ 64, /* subxi */ + 48, /* rsbi */ 32, /* mulr */ 48, /* muli */ 96, /* qmulr */ @@ -178,6 +179,7 @@ 32, /* addi_f */ 16, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 0, /* mulr_f */ 16, /* muli_f */ 144, /* divr_f */ @@ -260,6 +262,7 @@ 32, /* addi_d */ 16, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 0, /* mulr_d */ 16, /* muli_d */ 128, /* divr_d */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index e8eee8a..f35f396 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -884,6 +884,7 @@ _emit_code(jit_state_t *_jit) case_rrw(sub,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(subc,); case_rrw(subc,); case_rrr(mul,); @@ -1055,6 +1056,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1137,6 +1139,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 9e9778f..1f48ecf 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -419,6 +419,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -960,6 +962,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { multu(r1, r2); diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c index bd83103..ff31609 100644 --- a/lib/jit_mips-fpu.c +++ b/lib/jit_mips-fpu.c @@ -193,6 +193,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define subr_d(r0,r1,r2) SUB_D(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) MUL_S(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -585,6 +591,7 @@ _b##name##i_##type(jit_state_t *_jit, \ fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -746,6 +753,7 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 2efe8f8..517bedc 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -24,6 +24,7 @@ 20, /* subci */ 28, /* subxr */ 28, /* subxi */ + 16, /* rsbi */ 8, /* mulr */ 16, /* muli */ 12, /* qmulr */ @@ -179,6 +180,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 16, /* addi_d */ 4, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 4, /* mulr_d */ 16, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 20, /* subci */ 28, /* subxr */ 28, /* subxi */ + 16, /* rsbi */ 8, /* mulr */ 16, /* muli */ 12, /* qmulr */ @@ -532,6 +536,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -614,6 +619,7 @@ 20, /* addi_d */ 4, /* subr_d */ 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ 20, /* muli_d */ 4, /* divr_d */ @@ -729,6 +735,7 @@ 36, /* subci */ 28, /* subxr */ 28, /* subxi */ + 32, /* rsbi */ 8, /* mulr */ 32, /* muli */ 12, /* qmulr */ @@ -884,6 +891,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -966,6 +974,7 @@ 16, /* addi_d */ 4, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 4, /* mulr_d */ 16, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index c31d43b..aed730c 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1005,6 +1005,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1186,6 +1187,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1268,6 +1270,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_names.c b/lib/jit_names.c index 171fbfc..6af4433 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -31,6 +31,7 @@ static char *code_name[] = { "subr", "subi", "subcr", "subci", "subxr", "subxi", + "rsbi", "mulr", "muli", "qmulr", "qmuli", "qmulr_u", "qmuli_u", diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 844c8b1..2f65d36 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -447,10 +447,10 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define TWI(t,a,s) FDs(3,t,a,s) # define TWGTI(a,s) TWI(8,a,s) # define TWLLEI(a,s) TWI(6,a,s) -# define XOR(d,a,b) FXO(31,a,d,b,0,316) -# define XOR_(d,a,b) FXO_(31,a,d,b,0,316) -# define XORI(s,a,u) FDu(26,s,a,u) -# define XORIS(s,a,u) FDu(27,s,a,u) +# define XOR(d,a,b) FX(31,a,d,b,316) +# define XOR_(d,a,b) FX_(31,a,d,b,316) +# define XORI(s,a,u) FDu(26,a,s,u) +# define XORIS(s,a,u) FDu(27,a,s,u) # define nop(c) _nop(_jit,c) static void _nop(jit_state_t*,jit_int32_t); # define movr(r0,r1) _movr(_jit,r0,r1) @@ -492,6 +492,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define subxr(r0,r1,r2) SUBFE(r0,r2,r1) # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define mulr(r0,r1,r2) MULLW(r0,r1,r2) # define mullr(r0,r1,r2) MULLW(r0,r1,r2) @@ -1120,6 +1122,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index fcfca82..4bef6c7 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -24,7 +24,6 @@ static void _FA(jit_state_t*,int,int,int,int,int,int,int); #define FXFL(o,m,b,x) _FXFL(_jit,o,m,b,x,0) #define FXFL_(o,m,b,x) _FXFL(_jit,o,m,b,x,1) static void _FXFL(jit_state_t*,int,int,int,int,int) maybe_unused; - # define FABS(d,b) FX(63,d,0,b,264) # define FABS_(d,b) FX_(63,d,0,b,264) # define FADD(d,a,b) FA(63,d,a,b,0,21) @@ -116,7 +115,6 @@ static void _FXFL(jit_state_t*,int,int,int,int,int) maybe_unused; # define STFSU(s,a,d) FDs(53,s,a,d) # define STFSUX(s,a,b) FX(31,s,a,b,695) # define STFSX(s,a,b) FX(31,s,a,b,663) - # define movr_f(r0,r1) movr_d(r0,r1) # define movr_d(r0,r1) _movr_d(_jit,r0,r1) static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); @@ -127,7 +125,6 @@ static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*); # define extr_f(r0,r1) extr_d(r0,r1) # define extr_d(r0,r1) _extr_d(_jit,r0,r1) static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t); - # define truncr_f(r0,r1) truncr_d(r0,r1) # define truncr_f_i(r0,r1) truncr_d_i(r0,r1) # define truncr_d_i(r0,r1) _truncr_d_i(_jit,r0,r1) @@ -140,17 +137,14 @@ static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t); # define truncr_d_l(r0,r1) _truncr_d_l(_jit,r0,r1) static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t); # endif - # define extr_d_f(r0,r1) FRSP(r0,r1) # define extr_f_d(r0,r1) movr_d(r0,r1) - # define absr_f(r0,r1) absr_d(r0,r1) # define absr_d(r0,r1) FABS(r0,r1) # define negr_f(r0,r1) negr_d(r0,r1) # define negr_d(r0,r1) FNEG(r0,r1) # define sqrtr_f(r0,r1) FSQRTS(r0,r1) # define sqrtr_d(r0,r1) FSQRT(r0,r1) - # define addr_f(r0,r1,r2) FADDS(r0,r1,r2) # define addr_d(r0,r1,r2) FADD(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) @@ -163,6 +157,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define subr_d(r0,r1,r2) FSUB(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) FMULS(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -175,7 +175,6 @@ static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define divr_d(r0,r1,r2) FDIV(r0,r1,r2) # define divi_d(r0,r1,i0) _divi_d(_jit,r0,r1,i0) static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); - # define ltr_f(r0,r1,r2) ltr_d(r0,r1,r2) # define ltr_d(r0,r1,r2) _ltr_d(_jit,r0,r1,r2) static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); @@ -372,7 +371,6 @@ static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*); # define bunordi_d(i0,r0,i1) _bunordi_d(_jit,i0,r0,i1) static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*); - # define ldr_f(r0,r1) LFSX(r0, _R0_REGNO, r1) # define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); @@ -565,6 +563,8 @@ fopi(add) dopi(add) fopi(sub) dopi(sub) +fopi(rsb) +dopi(rsb) fopi(mul) dopi(mul) fopi(div) diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 7fae394..ba78813 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -24,6 +24,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -179,6 +180,7 @@ 12, /* addi_f */ 4, /* subr_f */ 12, /* subi_f */ + 12, /* rsbi_f */ 4, /* mulr_f */ 12, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 12, /* addi_d */ 4, /* subr_d */ 12, /* subi_d */ + 12, /* rsbi_d */ 4, /* mulr_d */ 12, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -532,6 +536,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -614,6 +619,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ @@ -730,6 +736,7 @@ 28, /* subci */ 4, /* subxr */ 8, /* subxi */ + 32, /* rsbi */ 4, /* mulr */ 28, /* muli */ 12, /* qmulr */ @@ -885,6 +892,7 @@ 28, /* addi_f */ 4, /* subr_f */ 28, /* subi_f */ + 28, /* rsbi_f */ 4, /* mulr_f */ 28, /* muli_f */ 4, /* divr_f */ @@ -967,6 +975,7 @@ 28, /* addi_d */ 4, /* subr_d */ 28, /* subi_d */ + 28, /* rsbi_d */ 4, /* mulr_d */ 28, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 88488f0..cbc5dda 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -902,6 +902,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1089,6 +1090,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1171,6 +1173,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index dfa4e56..0030bac 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -934,6 +934,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -2447,6 +2449,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { if (r0 == r2) diff --git a/lib/jit_s390x-fpu.c b/lib/jit_s390x-fpu.c index 25c2b6f..d36f0fb 100644 --- a/lib/jit_s390x-fpu.c +++ b/lib/jit_s390x-fpu.c @@ -372,6 +372,10 @@ static void _subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2) static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subi_d(r0,r1,i0) dp(sub,r0,r1,i0) +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) fp(rsb,r0,r1,i0) +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) dp(rsb,r0,r1,i0) # define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli_f(r0,r1,i0) fp(mul,r0,r1,i0) @@ -597,6 +601,7 @@ _fp(jit_state_t *_jit, jit_code_t code, switch (code) { case jit_code_addi_f: addr_f(r0, r1, rn(reg)); break; case jit_code_subi_f: subr_f(r0, r1, rn(reg)); break; + case jit_code_rsbi_f: rsbr_f(r0, r1, rn(reg)); break; case jit_code_muli_f: mulr_f(r0, r1, rn(reg)); break; case jit_code_divi_f: divr_f(r0, r1, rn(reg)); break; case jit_code_uneqi_f: uneqr_f(r0, r1, rn(reg)); break; @@ -616,6 +621,7 @@ _dp(jit_state_t *_jit, jit_code_t code, switch (code) { case jit_code_addi_d: addr_d(r0, r1, rn(reg)); break; case jit_code_subi_d: subr_d(r0, r1, rn(reg)); break; + case jit_code_rsbi_d: rsbr_d(r0, r1, rn(reg)); break; case jit_code_muli_d: mulr_d(r0, r1, rn(reg)); break; case jit_code_divi_d: divr_d(r0, r1, rn(reg)); break; case jit_code_uneqi_d: uneqr_d(r0, r1, rn(reg)); break; diff --git a/lib/jit_s390x-sz.c b/lib/jit_s390x-sz.c index a80b973..a933e15 100644 --- a/lib/jit_s390x-sz.c +++ b/lib/jit_s390x-sz.c @@ -23,6 +23,7 @@ 20, /* subci */ 12, /* subxr */ 12, /* subxi */ + 28, /* rsbi */ 8, /* mulr */ 24, /* muli */ 60, /* qmulr */ @@ -178,6 +179,7 @@ 26, /* addi_f */ 8, /* subr_f */ 26, /* subi_f */ + 26, /* rsbi_f */ 6, /* mulr_f */ 26, /* muli_f */ 8, /* divr_f */ @@ -260,6 +262,7 @@ 26, /* addi_d */ 8, /* subr_d */ 26, /* subi_d */ + 26, /* rsbi_d */ 6, /* mulr_d */ 26, /* muli_d */ 8, /* divr_d */ diff --git a/lib/jit_s390x.c b/lib/jit_s390x.c index 4c45db0..460d793 100644 --- a/lib/jit_s390x.c +++ b/lib/jit_s390x.c @@ -727,6 +727,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -896,6 +897,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add); case_rrr(sub, _f); case_rrf(sub); + case_rrf(rsb); case_rrr(mul, _f); case_rrf(mul); case_rrr(div, _f); @@ -978,6 +980,7 @@ _emit_code(jit_state_t *_jit) case_rrd(add); case_rrr(sub, _d); case_rrd(sub); + case_rrd(rsb); case_rrr(mul, _d); case_rrd(mul); case_rrr(div, _d); diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 095c364..96150d3 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -452,6 +452,8 @@ static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0, r1, r2) UMUL(r1, r2, r0) # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -928,6 +930,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; @@ -1325,7 +1334,7 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - ldxr_c(r0, r1, rn(reg)); + ldxr_s(r0, r1, rn(reg)); jit_unget_reg(reg); } } diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index 52d760d..dc4d574 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -143,7 +143,6 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FCMPEQ(rs1, rs2) FPop2(0, rs1, SPARC_FCMPEQ, rs2) # define CPop1(rd, rs1, opc, rs2) f3f(rd, 54, rs1, opf, rs2) # define CPop2(rd, rs1, opc, rs2) f3f(rd, 55, rs1, opf, rs2) - # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_f(r0, r1) truncr_f_i(r0, r1) @@ -156,7 +155,6 @@ static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); # define negr_f(r0, r1) FNEGS(r1, r0) # define absr_f(r0, r1) FABSS(r1, r0) # define sqrtr_f(r0, r1) FSQRTS(r1, r0) - # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_d(r0, r1) truncr_d_i(r0, r1) @@ -172,32 +170,40 @@ static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define absr_d(r0, r1) _absr_d(_jit, r0, r1) static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define sqrtr_d(r0, r1) FSQRTD(r1, r0) - # define fop1f(op, r0, r1, i0) _fop1f(_jit, op, r0, r1, i0) static void _fop1f(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float32_t*); +# define rfop1f(op, r0, r1, i0) _rfop1f(_jit, op, r0, r1, i0) +static void _rfop1f(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_float32_t*); # define fop1d(op, r0, r1, i0) _fop1d(_jit, op, r0, r1, i0) static void _fop1d(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float64_t*); - +# define rfop1d(op, r0, r1, i0) _rfop1d(_jit, op, r0, r1, i0) +static void _rfop1d(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_float64_t*); # define addr_f(r0, r1, r2) FADDS(r1, r2, r0) # define addi_f(r0, r1, i0) fop1f(SPARC_FADDS, r0, r1, i0) # define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) # define subi_f(r0, r1, i0) fop1f(SPARC_FSUBS, r0, r1, i0) +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) rfop1f(SPARC_FSUBS, r0, r1, i0) +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) # define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) # define muli_f(r0, r1, i0) fop1f(SPARC_FMULS, r0, r1, i0) # define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) # define divi_f(r0, r1, i0) fop1f(SPARC_FDIVS, r0, r1, i0) - # define addr_d(r0, r1, r2) FADDD(r1, r2, r0) # define addi_d(r0, r1, i0) fop1d(SPARC_FADDD, r0, r1, i0) # define subr_d(r0, r1, r2) FSUBD(r1, r2, r0) # define subi_d(r0, r1, i0) fop1d(SPARC_FSUBD, r0, r1, i0) +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) # define mulr_d(r0, r1, r2) FMULD(r1, r2, r0) # define muli_d(r0, r1, i0) fop1d(SPARC_FMULD, r0, r1, i0) # define divr_d(r0, r1, r2) FDIVD(r1, r2, r0) # define divi_d(r0, r1, i0) fop1d(SPARC_FDIVD, r0, r1, i0) - #define fcr(cc, r0, r1, r2) _fcr(_jit, cc, r0, r1, r2) static void _fcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define fcw(cc, r0, r1, i0) _fcw(_jit, cc, r0, r1, i0) @@ -231,7 +237,6 @@ _fcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t*); # define ordi_f(r0, r1, i0) fcw(SPARC_FBO, r0, r1, i0) # define unordr_f(r0, r1, r2) fcr(SPARC_FBU, r0, r1, r2) # define unordi_f(r0, r1, i0) fcw(SPARC_FBU, r0, r1, i0) - #define dcr(cc, r0, r1, r2) _dcr(_jit, cc, r0, r1, r2) static void _dcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define dcw(cc, r0, r1, i0) _dcw(_jit, cc, r0, r1, i0) @@ -265,7 +270,6 @@ _dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*); # define ordi_d(r0, r1, i0) dcw(SPARC_FBO, r0, r1, i0) # define unordr_d(r0, r1, r2) dcr(SPARC_FBU, r0, r1, r2) # define unordi_d(r0, r1, i0) dcw(SPARC_FBU, r0, r1, i0) - # define ldr_f(r0, r1) LDF(r1, 0, r0) # define ldi_f(r0, i0) _ldi_f(_jit, r0, i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); @@ -278,7 +282,6 @@ static void _sti_f(jit_state_t*,jit_int32_t,jit_word_t); # define stxr_f(r0, r1, r2) STF(r2, r1, r0) # define stxi_f(r0, r1, i0) _stxi_f(_jit, r0, r1, i0) static void _stxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); - # define ldr_d(r0, r1) LDDF(r1, 0, r0) # define ldi_d(r0, i0) _ldi_d(_jit, r0, i0) static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); @@ -291,7 +294,6 @@ static void _sti_d(jit_state_t*,jit_int32_t,jit_word_t); # define stxr_d(r0, r1, r2) STDF(r2, r1, r0) # define stxi_d(r0, r1, i0) _stxi_d(_jit, r0, r1, i0) static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); - # define fbr(cc, i0, r0, r1) _fbr(_jit, cc, i0, r0, r1) static jit_word_t _fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); @@ -360,7 +362,6 @@ _dbw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t*); # define bordi_d(i0, r0, i1) dbw(SPARC_FBO, i0, r0, i1) # define bunordr_d(i0, r0, r1) dbr(SPARC_FBU, i0, r0, r1) # define bunordi_d(i0, r0, i1) dbw(SPARC_FBU, i0, r0, i1) - #endif #if CODE @@ -470,6 +471,17 @@ _fop1f(jit_state_t *_jit, jit_int32_t op, } static void +_rfop1f(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + FPop1(r0, rn(reg), op, r1); + jit_unget_reg(reg); +} + +static void _fop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { @@ -481,6 +493,17 @@ _fop1d(jit_state_t *_jit, jit_int32_t op, } static void +_rfop1d(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + FPop1(r0, rn(reg), op, r1); + jit_unget_reg(reg); +} + +static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { stxi(-8, _FP_REGNO, r1); diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index df52ca3..6acfd09 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 4, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 8, /* qmulr */ @@ -178,6 +179,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index a8488e2..dc1557c 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -745,6 +745,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -898,6 +899,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -980,6 +982,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 0a5d330..2e284f9 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -202,6 +202,8 @@ static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define isubxi(r0, i0) alui(X86_SBB, r0, i0) # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define imulr(r0, r1) _imulr(_jit, r0, r1) static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t); # define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0) @@ -1110,6 +1112,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { rex(0, 1, r0, _NOREG, r1); diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index d3bd7ae..6a3d996 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -131,6 +131,12 @@ static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0) static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1) +# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0) +static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1) +# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0) +static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2) static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0) @@ -592,6 +598,10 @@ _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) dopi(sub) +fopi(rsb) + +dopi(rsb) + static void _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index 70f8a03..1c73ea3 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -23,6 +23,7 @@ 6, /* subci */ 6, /* subxr */ 5, /* subxi */ + 10, /* rsbi */ 5, /* mulr */ 7, /* muli */ 20, /* qmulr */ @@ -178,6 +179,7 @@ 19, /* addi_f */ 12, /* subr_f */ 19, /* subi_f */ + 19, /* rsbi_f */ 8, /* mulr_f */ 19, /* muli_f */ 12, /* divr_f */ @@ -260,6 +262,7 @@ 26, /* addi_d */ 12, /* subr_d */ 26, /* subi_d */ + 26, /* rsbi_d */ 8, /* mulr_d */ 26, /* muli_d */ 12, /* divr_d */ @@ -375,6 +378,7 @@ 13, /* subci */ 9, /* subxr */ 7, /* subxi */ + 19, /* rsbi */ 7, /* mulr */ 14, /* muli */ 20, /* qmulr */ @@ -530,6 +534,7 @@ 21, /* addi_f */ 15, /* subr_f */ 21, /* subi_f */ + 21, /* rsbi_f */ 10, /* mulr_f */ 21, /* muli_f */ 15, /* divr_f */ @@ -612,6 +617,7 @@ 25, /* addi_d */ 15, /* subr_d */ 25, /* subi_d */ + 25, /* rsbi_d */ 10, /* mulr_d */ 25, /* muli_d */ 15, /* divr_d */ @@ -725,6 +731,7 @@ 13, /* subci */ 9, /* subxr */ 7, /* subxi */ + 19, /* rsbi */ 7, /* mulr */ 14, /* muli */ 20, /* qmulr */ @@ -880,6 +887,7 @@ 20, /* addi_f */ 15, /* subr_f */ 20, /* subi_f */ + 20, /* rsbi_f */ 10, /* mulr_f */ 20, /* muli_f */ 15, /* divr_f */ @@ -962,6 +970,7 @@ 25, /* addi_d */ 15, /* subr_d */ 25, /* subi_d */ + 25, /* rsbi_d */ 10, /* mulr_d */ 25, /* muli_d */ 15, /* divr_d */ diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c index 8faa140..2677d43 100644 --- a/lib/jit_x86-x87.c +++ b/lib/jit_x86-x87.c @@ -83,6 +83,12 @@ static void _x87_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _x87_subr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define x87_subi_d(r0, r1, i0) _x87_subi_d(_jit, r0, r1, i0) static void _x87_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define x87_rsbr_f(r0, r1, r2) x87_subr_f(r0, r2, r1) +# define x87_rsbi_f(r0, r1, i0) _x87_rsbi_f(_jit, r0, r1, i0) +static void _x87_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define x87_rsbr_d(r0, r1, r2) x87_subr_d(r0, r2, r1) +# define x87_rsbi_d(r0, r1, i0) _x87_rsbi_d(_jit, r0, r1, i0) +static void _x87_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define x87_mulr_f(r0, r1, r2) _x87_mulr_d(_jit, r0, r1, r2) # define x87_muli_f(r0, r1, i0) _x87_muli_f(_jit, r0, r1, i0) static void _x87_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -446,6 +452,7 @@ _x87rri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1) fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -517,6 +524,8 @@ _x87_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) dopi(sub) +dopi(rsb) + static void _x87_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 20dcc21..6f55ebd 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1297,6 +1297,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subx,); case_rrr(subc,); case_rrw(subc,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1478,6 +1479,7 @@ _emit_code(jit_state_t *_jit) case_ffw(add, _f, 32); case_fff(sub, _f); case_ffw(sub, _f, 32); + case_ffw(rsb, _f, 32); case_fff(mul, _f); case_ffw(mul, _f, 32); case_fff(div, _f); @@ -1576,6 +1578,7 @@ _emit_code(jit_state_t *_jit) case_ffw(add, _d, 64); case_fff(sub, _d); case_ffw(sub, _d, 64); + case_ffw(rsb, _d, 64); case_fff(mul, _d); case_ffw(mul, _d, 64); case_fff(div, _d); diff --git a/lib/lightning.c b/lib/lightning.c index 9ee261d..20aa1bd 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1237,6 +1237,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_addi: case jit_code_addxi: case jit_code_addci: case jit_code_subi: case jit_code_subxi: case jit_code_subci: + case jit_code_rsbi: case jit_code_muli: case jit_code_divi: case jit_code_divi_u: case jit_code_remi: case jit_code_remi_u: case jit_code_andi: case jit_code_ori: case jit_code_xori: case jit_code_lshi: @@ -1254,20 +1255,22 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a2_int; break; - case jit_code_addi_f: case jit_code_subi_f: case jit_code_muli_f: - case jit_code_divi_f: case jit_code_lti_f: case jit_code_lei_f: - case jit_code_eqi_f: case jit_code_gei_f: case jit_code_gti_f: - case jit_code_nei_f: case jit_code_unlti_f: case jit_code_unlei_f: - case jit_code_uneqi_f: case jit_code_ungei_f: case jit_code_ungti_f: - case jit_code_ltgti_f: case jit_code_ordi_f: case jit_code_unordi_f: + case jit_code_addi_f: case jit_code_subi_f: case jit_code_rsbi_f: + case jit_code_muli_f: case jit_code_divi_f: case jit_code_lti_f: + case jit_code_lei_f: case jit_code_eqi_f: case jit_code_gei_f: + case jit_code_gti_f: case jit_code_nei_f: case jit_code_unlti_f: + case jit_code_unlei_f: case jit_code_uneqi_f: case jit_code_ungei_f: + case jit_code_ungti_f: case jit_code_ltgti_f: case jit_code_ordi_f: + case jit_code_unordi_f: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_flt; break; - case jit_code_addi_d: case jit_code_subi_d: case jit_code_muli_d: - case jit_code_divi_d: case jit_code_lti_d: case jit_code_lei_d: - case jit_code_eqi_d: case jit_code_gei_d: case jit_code_gti_d: - case jit_code_nei_d: case jit_code_unlti_d: case jit_code_unlei_d: - case jit_code_uneqi_d: case jit_code_ungei_d: case jit_code_ungti_d: - case jit_code_ltgti_d: case jit_code_ordi_d: case jit_code_unordi_d: + case jit_code_addi_d: case jit_code_subi_d: case jit_code_rsbi_d: + case jit_code_muli_d: case jit_code_divi_d: case jit_code_lti_d: + case jit_code_lei_d: case jit_code_eqi_d: case jit_code_gei_d: + case jit_code_gti_d: case jit_code_nei_d: case jit_code_unlti_d: + case jit_code_unlei_d: case jit_code_uneqi_d: case jit_code_ungei_d: + case jit_code_ungti_d: case jit_code_ltgti_d: case jit_code_ordi_d: + case jit_code_unordi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl; break; case jit_code_addr: case jit_code_addxr: case jit_code_addcr: |