author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年08月21日 19:45:10 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年08月21日 19:45:10 -0300 |
commit | 512f9c3ccd3ec43d95fb235040451558b816cfff (patch) | |
tree | ac9afd08546379908922b86fff6639e85f49e19d | |
parent | 3d72aba731677c1c262692a584bd41b6b12c792e (diff) | |
download | lightning-512f9c3ccd3ec43d95fb235040451558b816cfff.tar.gz |
@@ -1,3 +1,23 @@ +2023年08月21日 Paulo Andrade <pcpa@gnu.org> + + * check/Makefile.am, check/lightning.c: Add new hmul tests. + * doc/body.texi: Document hmul. + * include/lightning.h.in: Create the new hmul codes. + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c, + lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_loongarch-cpu.c, lib/jit_loongarch-sz.c, lib/jit_loongarch.c, + lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c, + lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, + lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Implement + hmul and update the *-sz.c files. + * lib/jit_names.c, lib/lightning.c: Add knowledge of hmul. + 2023年04月18日 Paulo Andrade <pcpa@gnu.org> * include/lightning.h.in: Define new fmar_f, fmai_f, fmsr_f, diff --git a/check/Makefile.am b/check/Makefile.am index 7142340..1f086ef 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -106,6 +106,7 @@ EXTRA_DIST = \ alux_sub.tst alux_sub.ok \ alu_rsb.tst alu_rsb.ok \ alu_mul.tst alu_mul.ok \ + alu_hmul.tst alu_hmul.ok \ alu_div.tst alu_div.ok \ alu_rem.tst alu_rem.ok \ alu_and.tst alu_and.ok \ @@ -164,7 +165,8 @@ base_TESTS = \ ext cvt hton bswap branch \ alu_add alux_add \ alu_sub alux_sub alu_rsb \ - alu_mul alu_div alu_rem \ + alu_mul alu_hmul \ + alu_div alu_rem \ alu_and alu_or alu_xor \ alu_lsh alu_rsh \ alu_com alu_neg alu_rot \ @@ -194,7 +196,8 @@ x87_TESTS = \ ext.x87 cvt.x87 branch.x87 \ alu_add.x87 alux_add.x87 \ alu_sub.x87 alux_sub.x87 alu_rsb.x87 \ - alu_mul.x87 alu_div.x87 alu_rem.x87 \ + alu_mul.x87 alu_hmul.x87 \ + alu_div.x87 alu_rem.x87 \ alu_and.x87 alu_or.x87 alu_xor.x87 \ alu_lsh.x87 alu_rsh.x87 alu_rot.x87 \ alu_com.x87 alu_neg.x87 \ @@ -218,7 +221,8 @@ x87_nodata_TESTS = \ ext.x87.nodata cvt.x87.nodata branch.x87.nodata \ alu_add.x87.nodata alux_add.x87.nodata \ alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata \ - alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \ + alu_mul.x87.nodata alu_hmul.x87.nodata \ + alu_div.x87.nodata alu_rem.x87.nodata \ alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ alu_lsh.x87.nodata alu_rsh.x87.nodata alu_rot.x87.nodata \ alu_com.x87.nodata alu_neg.x87.nodata \ @@ -244,7 +248,8 @@ arm_TESTS = \ ext.arm cvt.arm hton.arm bswap.arm \ branch.arm alu_add.arm alux_add.arm \ alu_sub.arm alux_sub.arm alu_rsb.arm \ - alu_mul.arm alu_div.arm alu_rem.arm \ + alu_mul.arm alu_hmul.arm \ + alu_div.arm alu_rem.arm \ alu_and.arm alu_or.arm alu_xor.arm \ alu_lsh.arm alu_rsh.arm alu_rot.arm \ alu_com.arm alu_neg.arm \ @@ -271,7 +276,8 @@ swf_TESTS = \ ext.swf cvt.swf hton.swf bswap.swf \ branch.swf alu_add.swf alux_add.swf \ alu_sub.swf alux_sub.swf alu_rsb.swf \ - alu_mul.swf alu_div.swf alu_rem.swf \ + alu_mul.swf alu_hmul.swf \ + alu_div.swf alu_rem.swf \ alu_and.swf alu_or.swf alu_xor.swf \ alu_lsh.swf alu_rsh.swf alu_rot.swf \ alu_com.swf alu_neg.swf \ @@ -351,7 +357,8 @@ nodata_TESTS = \ ext.nodata cvt.nodata branch.nodata \ alu_add.nodata alux_add.nodata \ alu_sub.nodata alux_sub.nodata alu_rsb.nodata \ - alu_mul.nodata alu_div.nodata alu_rem.nodata \ + alu_mul.nodata alu_hmul.nodata \ + alu_div.nodata alu_rem.nodata \ alu_and.nodata alu_or.nodata alu_xor.nodata \ alu_lsh.nodata alu_rsh.nodata alu_rot.nodata \ alu_com.nodata alu_neg.nodata \ diff --git a/check/alu_hmul.ok b/check/alu_hmul.ok new file mode 100644 index 0000000..9766475 --- /dev/null +++ b/check/alu_hmul.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_hmul.tst b/check/alu_hmul.tst new file mode 100644 index 0000000..ef47a4d --- /dev/null +++ b/check/alu_hmul.tst @@ -0,0 +1,31 @@ +#include "alu.inc" + +.code + prolog +#define HMUL(N, I0, I1, V) ALU(N, , hmul, I0, I1, V) +#define UHMUL(N, I0, I1, V) ALU(N, _u, hmul, I0, I1, V) + HMUL(0, -2, -1, 0) + HMUL(1, 0, -1, 0) + HMUL(2, -1, 0, 0) + HMUL(3, 1, -1, -1) +#if __WORDSIZE == 32 + HMUL(4, 0x7ffff, 0x7ffff, 0x3f) + UHMUL(5, 0xffffff, 0xffffff, 0xffff) + HMUL(6, 0x80000000, -2, 1) + HMUL(7, 0x80000000, 2, -1) + HMUL(8, 0x80000001, 3, -2) + HMUL(9, 0x80000001, -3, 1) +#else + HMUL(4, 0x7ffffffff, 0x7ffffffff, 0x3f) + UHMUL(5, 0xffffffffff, 0xffffffffff, 0xffff) + HMUL(6, 0x8000000000000000, -2, 1) + HMUL(7, 0x8000000000000000, 2, -1) + HMUL(8, 0x8000000000000001, 3, -2) + HMUL(9, 0x8000000000000001, -3, 1) +#endif + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index f04ecd8..42a6ed8 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -306,6 +306,8 @@ static void subxr(void); static void subxi(void); static void subcr(void); static void subci(void); static void rsbr(void); static void rsbi(void); static void mulr(void); static void muli(void); +static void hmulr(void); static void hmuli(void); +static void hmulr_u(void); static void hmuli_u(void); static void qmulr(void); static void qmuli(void); static void qmulr_u(void); static void qmuli_u(void); static void divr(void); static void divi(void); @@ -709,6 +711,8 @@ static instr_t instr_vector[] = { entry(subcr), entry(subci), entry(rsbr), entry(rsbi), entry(mulr), entry(muli), + entry(hmulr), entry(hmuli), + entry(hmulr_u), entry(hmuli_u), entry(qmulr), entry(qmuli), entry(qmulr_u), entry(qmuli_u), entry(divr), entry(divi), @@ -1723,6 +1727,8 @@ entry_ir_ir_ir(subxr) entry_ir_ir_im(subxi) entry_ir_ir_ir(subcr) entry_ir_ir_im(subci) entry_ir_ir_ir(rsbr) entry_ir_ir_im(rsbi) entry_ir_ir_ir(mulr) entry_ir_ir_im(muli) +entry_ir_ir_ir(hmulr) entry_ir_ir_im(hmuli) +entry_ir_ir_ir(hmulr_u) entry_ir_ir_im(hmuli_u) entry_ir_ir_ir_ir(qmulr) entry_ir_ir_ir_im(qmuli) entry_ir_ir_ir_ir(qmulr_u) entry_ir_ir_ir_im(qmuli_u) entry_ir_ir_ir(divr) entry_ir_ir_im(divi) diff --git a/doc/body.texi b/doc/body.texi index cb47139..f71b77c 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -247,6 +247,8 @@ rsbr _f _d O1 = O3 - O1 rsbi _f _d O1 = O3 - O1 mulr _f _d O1 = O2 * O3 muli _f _d O1 = O2 * O3 +hmulr _u O1 = ((O2 * O3) >> WORDSIZE) +hmuli _u O1 = ((O2 * O3) >> WORDSIZE) divr _u _f _d O1 = O2 / O3 divi _u _f _d O1 = O2 / O3 remr _u O1 = O2 % O3 diff --git a/include/lightning.h.in b/include/lightning.h.in index a4ef49f..6d51235 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -1203,6 +1203,13 @@ typedef enum { #define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x) jit_code_fnmsr_d, jit_code_fnmsi_d, +#define jit_hmulr(u,v,w) jit_new_node_www(jit_code_hmulr,u,v,w) +#define jit_hmuli(u,v,w) jit_new_node_www(jit_code_hmuli,u,v,w) + jit_code_hmulr, jit_code_hmuli, +#define jit_hmulr_u(u,v,w) jit_new_node_www(jit_code_hmulr_u,u,v,w) +#define jit_hmuli_u(u,v,w) jit_new_node_www(jit_code_hmuli_u,u,v,w) + jit_code_hmulr_u, jit_code_hmuli_u, + jit_code_last_code } jit_code_t; diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index d414cec..76a988b 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -555,6 +555,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) MUL(r0,r1,r2) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0,r1,r2) SMULH(r0,r1,r2) +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr_u(r0,r1,r2) UMULH(r0,r1,r2) +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) static void _qmulr(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t); @@ -1229,6 +1235,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index 0a7bc2d..435bbe9 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -526,7 +526,12 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 16, /* hmuli */ + 4, /* hmulr_u */ + 16, /* hmuli_u */ # else /* PACKED_STACK */ + #define JIT_INSTR_MAX 120 0, /* data */ 0, /* live */ @@ -1052,5 +1057,9 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 16, /* hmuli */ + 4, /* hmulr_u */ + 16, /* hmuli_u */ # endif #endif /* __WORDSIZE */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index cd4e79c..bc78800 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1426,6 +1426,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index 7791063..b977214 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -362,6 +362,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t); # define mulr(r0,r1,r2) MULQ(r1,r2,r0) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2) +# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0) +# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2) +# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0) # define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) static void _qmulr(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t); @@ -1082,14 +1086,14 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t reg; /* The only invalid condition is r0 == r1 */ jit_int32_t t2, t3, s2, s3; - if (r2 == r0 || r2 == r1) { + if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) { s2 = jit_get_reg(jit_class_gpr); t2 = rn(s2); movr(t2, r2); } else t2 = r2; - if (r3 == r0 || r3 == r1) { + if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) { s3 = jit_get_reg(jit_class_gpr); t3 = rn(s3); movr(t3, r3); @@ -1129,16 +1133,20 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { jit_int32_t reg; - if (r0 == r2 || r0 == r3) { - reg = jit_get_reg(jit_class_gpr); - mulr(rn(reg), r2, r3); + if (r0 != JIT_NOREG) { + if (r0 == r2 || r0 == r3) { + reg = jit_get_reg(jit_class_gpr); + mulr(rn(reg), r2, r3); + } + else + mulr(r0, r2, r3); } - else - mulr(r0, r2, r3); UMULH(r2, r3, r1); - if (r0 == r2 || r0 == r3) { - movr(r0, rn(reg)); - jit_unget_reg(reg); + if (r0 != JIT_NOREG) { + if (r0 == r2 || r0 == r3) { + movr(r0, rn(reg)); + jit_unget_reg(reg); + } } } @@ -1148,16 +1156,20 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0, { jit_int32_t reg; if (_u8_p(i0)) { - if (r0 == r2) { - reg = jit_get_reg(jit_class_gpr); - muli(rn(reg), r2, i0); + if (r0 != JIT_NOREG) { + if (r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + muli(rn(reg), r2, i0); + } + else + muli(r0, r2, i0); } - else - muli(r0, r2, i0); UMULHi(r2, i0, r1); - if (r0 == r2) { - movr(r0, rn(reg)); - jit_unget_reg(reg); + if (r0 != JIT_NOREG) { + if (r0 == r2) { + movr(r0, rn(reg)); + jit_unget_reg(reg); + } } } else { diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index b6ea741..fd39c0d 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -524,4 +524,8 @@ 0, /* fnmai_d */ 20, /* fnmsr_d */ 0, /* fnmsi_d */ + 36, /* hmulr */ + 60, /* hmuli */ + 4, /* hmulr_u */ + 28, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index d2d378f..69bf397 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -1019,6 +1019,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 9e19434..149db9a 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -984,6 +984,16 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0,r1,r2) ihmulr(r0,r1,r2,1) +# define hmulr_u(r0,r1,r2) ihmulr(r0,r1,r2,0) +# define ihmulr(r0,r1,r2,cc) _ihmulr(_jit,r0,r1,r2,cc) +static void _ihmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_bool_t); +# define hmuli(r0,r1,i0) ihmuli(r0,r1,i0,1) +# define hmuli_u(r0,r1,i0) ihmuli(r0,r1,i0,0) +# define ihmuli(r0,r1,i0,cc) _ihmuli(_jit,r0,r1,i0,cc) +static void _ihmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_word_t,jit_bool_t); # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) # define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) @@ -2322,6 +2332,29 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_ihmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + iqmulr(rn(reg), r0, r1, r2, sign); + jit_unget_reg(reg); +} + +static void +_ihmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + movi(rn(t1), i0); + iqmulr(rn(t0), r0, r1, rn(t1), sign); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 31931e4..7ec5e9e 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -4,14 +4,14 @@ #define JIT_INSTR_MAX 144 0, /* data */ 0, /* live */ - 20, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ - 26, /* prolog */ + 30, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -43,45 +43,45 @@ 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ - 16, /* va_arg_d */ + 28, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ - 12, /* addi */ + 12, /* addi */ 4, /* addcr */ 8, /* addci */ 4, /* addxr */ 4, /* addxi */ 4, /* subr */ - 12, /* subi */ + 12, /* subi */ 4, /* subcr */ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ - 16, /* rsbi */ - 4, /* mulr */ - 12, /* muli */ + 16, /* rsbi */ + 8, /* mulr */ + 12, /* muli */ 4, /* qmulr */ - 12, /* qmuli */ + 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 32, /* divr */ - 36, /* divi */ - 24, /* divr_u */ - 28, /* divi_u */ - 18, /* qdivr */ - 22, /* qdivi */ - 18, /* qdivr_u */ - 22, /* qdivi_u */ - 24, /* remr */ - 32, /* remi */ - 24, /* remr_u */ - 28, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 12, /* andi */ 4, /* orr */ - 12, /* ori */ + 12, /* ori */ 4, /* xorr */ - 12, /* xori */ + 12, /* xori */ 4, /* lshr */ 4, /* lshi */ 4, /* rshr */ @@ -92,98 +92,98 @@ 4, /* negi */ 4, /* comr */ 4, /* comi */ - 14, /* ltr */ - 14, /* lti */ - 14, /* ltr_u */ - 14, /* lti_u */ - 14, /* ler */ - 14, /* lei */ - 14, /* ler_u */ - 14, /* lei_u */ - 14, /* eqr */ - 14, /* eqi */ - 14, /* ger */ - 14, /* gei */ - 14, /* ger_u */ - 14, /* gei_u */ - 14, /* gtr */ - 14, /* gti */ - 14, /* gtr_u */ - 14, /* gti_u */ - 14, /* ner */ - 14, /* nei */ + 14, /* ltr */ + 14, /* lti */ + 14, /* ltr_u */ + 14, /* lti_u */ + 14, /* ler */ + 14, /* lei */ + 14, /* ler_u */ + 14, /* lei_u */ + 14, /* eqr */ + 14, /* eqi */ + 14, /* ger */ + 14, /* gei */ + 14, /* ger_u */ + 14, /* gei_u */ + 14, /* gtr */ + 14, /* gti */ + 14, /* gtr_u */ + 14, /* gti_u */ + 14, /* ner */ + 14, /* nei */ 4, /* movr */ 8, /* movi */ 8, /* movnr */ 8, /* movzr */ - 42, /* casr */ - 50, /* casi */ - 4, /* extr_c */ + 42, /* casr */ + 46, /* casi */ + 8, /* extr_c */ 4, /* exti_c */ 4, /* extr_uc */ 4, /* exti_uc */ - 4, /* extr_s */ + 8, /* extr_s */ 4, /* exti_s */ - 4, /* extr_us */ + 8, /* extr_us */ 4, /* exti_us */ 0, /* extr_i */ 0, /* exti_i */ 0, /* extr_ui */ 0, /* exti_ui */ - 8, /* bswapr_us */ + 20, /* bswapr_us */ 4, /* bswapi_us */ - 4, /* bswapr_ui */ + 16, /* bswapr_ui */ 8, /* bswapi_ui */ 0, /* bswapr_ul */ 0, /* bswapi_ul */ - 8, /* htonr_us */ + 20, /* htonr_us */ 4, /* htoni_us */ - 4, /* htonr_ui */ + 16, /* htonr_ui */ 8, /* htoni_ui */ 0, /* htonr_ul */ 0, /* htoni_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 12, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 12, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 12, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 12, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 12, /* ldi_i */ 0, /* ldr_ui */ 0, /* ldi_ui */ 0, /* ldr_l */ 0, /* ldi_l */ 4, /* ldxr_c */ - 12, /* ldxi_c */ + 12, /* ldxi_c */ 4, /* ldxr_uc */ - 12, /* ldxi_uc */ + 12, /* ldxi_uc */ 4, /* ldxr_s */ - 12, /* ldxi_s */ + 12, /* ldxi_s */ 4, /* ldxr_us */ - 12, /* ldxi_us */ + 12, /* ldxi_us */ 4, /* ldxr_i */ - 12, /* ldxi_i */ + 12, /* ldxi_i */ 0, /* ldxr_ui */ 0, /* ldxi_ui */ 0, /* ldxr_l */ 0, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 12, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 12, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 12, /* sti_i */ 0, /* str_l */ 0, /* sti_l */ 4, /* stxr_c */ - 12, /* stxi_c */ + 12, /* stxi_c */ 4, /* stxr_s */ - 12, /* stxi_s */ + 12, /* stxi_s */ 4, /* stxr_i */ - 12, /* stxi_i */ + 12, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ 8, /* bltr */ @@ -195,7 +195,7 @@ 8, /* bler_u */ 8, /* blei_u */ 8, /* beqr */ - 16, /* beqi */ + 16, /* beqi */ 8, /* bger */ 8, /* bgei */ 8, /* bger_u */ @@ -205,7 +205,7 @@ 8, /* bgtr_u */ 8, /* bgti_u */ 8, /* bner */ - 16, /* bnei */ + 16, /* bnei */ 8, /* bmsr */ 8, /* bmsi */ 8, /* bmcr */ @@ -226,10 +226,10 @@ 8, /* bxsubi */ 8, /* bxsubr_u */ 8, /* bxsubi_u */ - 4, /* jmpr */ - 8, /* jmpi */ + 12, /* jmpr */ + 72, /* jmpi */ 4, /* callr */ - 20, /* calli */ + 20, /* calli */ 0, /* prepare */ 0, /* pushargr_c */ 0, /* pushargi_c */ @@ -269,96 +269,96 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 16, /* epilog */ + 276, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ - 4, /* addr_f */ - 8, /* addi_f */ - 4, /* subr_f */ - 8, /* subi_f */ - 8, /* rsbi_f */ - 4, /* mulr_f */ - 8, /* muli_f */ - 4, /* divr_f */ - 8, /* divi_f */ - 4, /* negr_f */ + 24, /* addr_f */ + 24, /* addi_f */ + 24, /* subr_f */ + 24, /* subi_f */ + 24, /* rsbi_f */ + 24, /* mulr_f */ + 24, /* muli_f */ + 24, /* divr_f */ + 24, /* divi_f */ + 12, /* negr_f */ 0, /* negi_f */ - 4, /* absr_f */ + 12, /* absr_f */ 0, /* absi_f */ - 4, /* sqrtr_f */ + 20, /* sqrtr_f */ 0, /* sqrti_f */ - 18, /* ltr_f */ - 30, /* lti_f */ - 20, /* ler_f */ - 32, /* lei_f */ - 18, /* eqr_f */ - 30, /* eqi_f */ - 18, /* ger_f */ - 30, /* gei_f */ - 18, /* gtr_f */ - 30, /* gti_f */ - 18, /* ner_f */ - 30, /* nei_f */ - 18, /* unltr_f */ - 30, /* unlti_f */ - 18, /* unler_f */ - 30, /* unlei_f */ - 24, /* uneqr_f */ - 36, /* uneqi_f */ - 18, /* unger_f */ - 30, /* ungei_f */ - 18, /* ungtr_f */ - 30, /* ungti_f */ - 24, /* ltgtr_f */ - 36, /* ltgti_f */ - 18, /* ordr_f */ - 30, /* ordi_f */ - 18, /* unordr_f */ - 30, /* unordi_f */ - 8, /* truncr_f_i */ + 24, /* ltr_f */ + 30, /* lti_f */ + 24, /* ler_f */ + 32, /* lei_f */ + 24, /* eqr_f */ + 30, /* eqi_f */ + 24, /* ger_f */ + 30, /* gei_f */ + 24, /* gtr_f */ + 30, /* gti_f */ + 28, /* ner_f */ + 32, /* nei_f */ + 56, /* unltr_f */ + 64, /* unlti_f */ + 56, /* unler_f */ + 64, /* unlei_f */ + 56, /* uneqr_f */ + 64, /* uneqi_f */ + 56, /* unger_f */ + 64, /* ungei_f */ + 56, /* ungtr_f */ + 64, /* ungti_f */ + 60, /* ltgtr_f */ + 68, /* ltgti_f */ + 28, /* ordr_f */ + 32, /* ordi_f */ + 56, /* unordr_f */ + 64, /* unordi_f */ + 20, /* truncr_f_i */ 0, /* truncr_f_l */ - 8, /* extr_f */ - 4, /* extr_d_f */ - 4, /* movr_f */ - 12, /* movi_f */ - 4, /* ldr_f */ - 12, /* ldi_f */ + 28, /* extr_f */ + 22, /* extr_d_f */ + 8, /* movr_f */ + 16, /* movi_f */ + 8, /* ldr_f */ + 16, /* ldi_f */ 8, /* ldxr_f */ - 16, /* ldxi_f */ - 4, /* str_f */ - 12, /* sti_f */ + 16, /* ldxi_f */ + 8, /* str_f */ + 16, /* sti_f */ 8, /* stxr_f */ - 16, /* stxi_f */ - 12, /* bltr_f */ - 24, /* blti_f */ - 12, /* bler_f */ - 24, /* blei_f */ - 12, /* beqr_f */ - 24, /* beqi_f */ - 12, /* bger_f */ - 24, /* bgei_f */ - 12, /* bgtr_f */ - 24, /* bgti_f */ - 12, /* bner_f */ - 24, /* bnei_f */ - 16, /* bunltr_f */ - 28, /* bunlti_f */ - 16, /* bunler_f */ - 28, /* bunlei_f */ - 20, /* buneqr_f */ - 32, /* buneqi_f */ - 16, /* bunger_f */ - 28, /* bungei_f */ - 12, /* bungtr_f */ - 24, /* bungti_f */ - 20, /* bltgtr_f */ - 32, /* bltgti_f */ - 12, /* bordr_f */ - 24, /* bordi_f */ - 12, /* bunordr_f */ - 24, /* bunordi_f */ + 16, /* stxi_f */ + 28, /* bltr_f */ + 32, /* blti_f */ + 28, /* bler_f */ + 32, /* blei_f */ + 28, /* beqr_f */ + 48, /* beqi_f */ + 28, /* bger_f */ + 32, /* bgei_f */ + 28, /* bgtr_f */ + 32, /* bgti_f */ + 28, /* bner_f */ + 32, /* bnei_f */ + 28, /* bunltr_f */ + 32, /* bunlti_f */ + 28, /* bunler_f */ + 32, /* bunlei_f */ + 60, /* buneqr_f */ + 68, /* buneqi_f */ + 28, /* bunger_f */ + 32, /* bungei_f */ + 28, /* bungtr_f */ + 32, /* bungti_f */ + 60, /* bltgtr_f */ + 68, /* bltgti_f */ + 28, /* bordr_f */ + 32, /* bordi_f */ + 28, /* bunordr_f */ + 32, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -368,91 +368,91 @@ 0, /* getarg_d */ 0, /* putargr_d */ 0, /* putargi_d */ - 4, /* addr_d */ - 20, /* addi_d */ - 4, /* subr_d */ - 20, /* subi_d */ - 20, /* rsbi_d */ - 4, /* mulr_d */ - 20, /* muli_d */ - 4, /* divr_d */ - 20, /* divi_d */ - 4, /* negr_d */ + 34, /* addr_d */ + 36, /* addi_d */ + 34, /* subr_d */ + 36, /* subi_d */ + 36, /* rsbi_d */ + 34, /* mulr_d */ + 36, /* muli_d */ + 34, /* divr_d */ + 36, /* divi_d */ + 20, /* negr_d */ 0, /* negi_d */ - 4, /* absr_d */ + 20, /* absr_d */ 0, /* absi_d */ - 4, /* sqrtr_d */ + 26, /* sqrtr_d */ 0, /* sqrti_d */ - 18, /* ltr_d */ - 34, /* lti_d */ - 20, /* ler_d */ - 36, /* lei_d */ - 18, /* eqr_d */ - 34, /* eqi_d */ - 18, /* ger_d */ - 34, /* gei_d */ - 18, /* gtr_d */ - 34, /* gti_d */ - 18, /* ner_d */ - 34, /* nei_d */ - 18, /* unltr_d */ - 34, /* unlti_d */ - 18, /* unler_d */ - 34, /* unlei_d */ - 24, /* uneqr_d */ - 40, /* uneqi_d */ - 18, /* unger_d */ - 34, /* ungei_d */ - 18, /* ungtr_d */ - 34, /* ungti_d */ - 24, /* ltgtr_d */ - 40, /* ltgti_d */ - 18, /* ordr_d */ - 34, /* ordi_d */ - 18, /* unordr_d */ - 34, /* unordi_d */ - 8, /* truncr_d_i */ + 28, /* ltr_d */ + 34, /* lti_d */ + 28, /* ler_d */ + 36, /* lei_d */ + 28, /* eqr_d */ + 34, /* eqi_d */ + 28, /* ger_d */ + 34, /* gei_d */ + 28, /* gtr_d */ + 34, /* gti_d */ + 32, /* ner_d */ + 36, /* nei_d */ + 66, /* unltr_d */ + 72, /* unlti_d */ + 66, /* unler_d */ + 72, /* unlei_d */ + 66, /* uneqr_d */ + 72, /* uneqi_d */ + 66, /* unger_d */ + 72, /* ungei_d */ + 66, /* ungtr_d */ + 72, /* ungti_d */ + 70, /* ltgtr_d */ + 76, /* ltgti_d */ + 32, /* ordr_d */ + 36, /* ordi_d */ + 66, /* unordr_d */ + 72, /* unordi_d */ + 20, /* truncr_d_i */ 0, /* truncr_d_l */ - 8, /* extr_d */ - 4, /* extr_f_d */ - 4, /* movr_d */ - 32, /* movi_d */ - 4, /* ldr_d */ - 12, /* ldi_d */ - 8, /* ldxr_d */ - 16, /* ldxi_d */ - 4, /* str_d */ - 12, /* sti_d */ - 8, /* stxr_d */ - 16, /* stxi_d */ - 12, /* bltr_d */ - 28, /* blti_d */ - 12, /* bler_d */ - 28, /* blei_d */ - 12, /* beqr_d */ - 36, /* beqi_d */ - 12, /* bger_d */ - 28, /* bgei_d */ - 12, /* bgtr_d */ - 28, /* bgti_d */ - 12, /* bner_d */ - 28, /* bnei_d */ - 16, /* bunltr_d */ - 32, /* bunlti_d */ - 16, /* bunler_d */ - 32, /* bunlei_d */ - 20, /* buneqr_d */ - 36, /* buneqi_d */ - 16, /* bunger_d */ - 32, /* bungei_d */ - 12, /* bungtr_d */ - 28, /* bungti_d */ - 20, /* bltgtr_d */ - 36, /* bltgti_d */ - 12, /* bordr_d */ - 28, /* bordi_d */ - 12, /* bunordr_d */ - 28, /* bunordi_d */ + 36, /* extr_d */ + 22, /* extr_f_d */ + 16, /* movr_d */ + 32, /* movi_d */ + 16, /* ldr_d */ + 24, /* ldi_d */ + 20, /* ldxr_d */ + 28, /* ldxi_d */ + 16, /* str_d */ + 24, /* sti_d */ + 20, /* stxr_d */ + 28, /* stxi_d */ + 32, /* bltr_d */ + 36, /* blti_d */ + 32, /* bler_d */ + 36, /* blei_d */ + 32, /* beqr_d */ + 52, /* beqi_d */ + 32, /* bger_d */ + 36, /* bgei_d */ + 32, /* bgtr_d */ + 36, /* bgti_d */ + 32, /* bner_d */ + 36, /* bnei_d */ + 32, /* bunltr_d */ + 36, /* bunlti_d */ + 32, /* bunler_d */ + 36, /* bunlei_d */ + 68, /* buneqr_d */ + 76, /* buneqi_d */ + 32, /* bunger_d */ + 36, /* bungei_d */ + 32, /* bungtr_d */ + 36, /* bungti_d */ + 68, /* bltgtr_d */ + 76, /* bltgti_d */ + 32, /* bordr_d */ + 36, /* bordi_d */ + 32, /* bunordr_d */ + 36, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -460,72 +460,76 @@ 0, /* retval_d */ 4, /* movr_w_f */ 8, /* movi_w_f */ - 4, /* movr_ww_d */ - 16, /* movi_ww_d */ + 8, /* movr_ww_d */ + 20, /* movi_ww_d */ 0, /* movr_w_d */ 0, /* movi_w_d */ 4, /* movr_f_w */ - 4, /* movi_f_w */ - 4, /* movr_d_ww */ - 12, /* movi_d_ww */ + 8, /* movi_f_w */ + 8, /* movr_d_ww */ + 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ 8, /* clor */ 4, /* cloi */ 4, /* clzr */ 4, /* clzi */ - 12, /* ctor */ + 12, /* ctor */ 4, /* ctoi */ 8, /* ctzr */ 4, /* ctzi */ 4, /* rbitr */ 8, /* rbiti */ - 40, /* popcntr */ + 40, /* popcntr */ 4, /* popcnti */ - 12, /* lrotr */ + 12, /* lrotr */ 4, /* lroti */ 4, /* rrotr */ 4, /* rroti */ - 4, /* extr */ + 8, /* extr */ 4, /* exti */ - 4, /* extr_u */ + 12, /* extr_u */ 4, /* exti_u */ - 4, /* depr */ - 8, /* depi */ - 50, /* qlshr */ + 24, /* depr */ + 20, /* depi */ + 50, /* qlshr */ 8, /* qlshi */ - 50, /* qlshr_u */ + 50, /* qlshr_u */ 8, /* qlshi_u */ - 50, /* qrshr */ + 50, /* qrshr */ 8, /* qrshi */ - 50, /* qrshr_u */ + 50, /* qrshr_u */ 8, /* qrshi_u */ - 72, /* unldr */ - 44, /* unldi */ - 72, /* unldr_u */ - 44, /* unldi_u */ - 68, /* unstr */ - 44, /* unsti */ - 140, /* unldr_x */ - 76, /* unldi_x */ - 144, /* unstr_x */ - 92, /* unsti_x */ - 8, /* fmar_f */ + 72, /* unldr */ + 44, /* unldi */ + 72, /* unldr_u */ + 44, /* unldi_u */ + 68, /* unstr */ + 44, /* unsti */ + 144, /* unldr_x */ + 80, /* unldi_x */ + 148, /* unstr_x */ + 96, /* unsti_x */ + 48, /* fmar_f */ 0, /* fmai_f */ - 8, /* fmsr_f */ + 48, /* fmsr_f */ 0, /* fmsi_f */ - 8, /* fmar_d */ + 68, /* fmar_d */ 0, /* fmai_d */ - 8, /* fmsr_d */ + 68, /* fmsr_d */ 0, /* fmsi_d */ - 12, /* fnmar_f */ + 60, /* fnmar_f */ 0, /* fnmai_f */ - 12, /* fnmsr_f */ + 60, /* fnmsr_f */ 0, /* fnmsi_f */ - 12, /* fnmar_d */ + 88, /* fnmar_d */ 0, /* fnmai_d */ - 12, /* fnmsr_d */ + 88, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 12, /* hmuli */ + 4, /* hmulr_u */ + 8, /* hmuli_u */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -534,14 +538,14 @@ #define JIT_INSTR_MAX 144 0, /* data */ 0, /* live */ - 20, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ - 30, /* prolog */ + 26, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -573,45 +577,45 @@ 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ - 28, /* va_arg_d */ + 16, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ - 12, /* addi */ + 12, /* addi */ 4, /* addcr */ 8, /* addci */ 4, /* addxr */ 4, /* addxi */ 4, /* subr */ - 12, /* subi */ + 12, /* subi */ 4, /* subcr */ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ - 16, /* rsbi */ - 8, /* mulr */ - 12, /* muli */ + 16, /* rsbi */ + 4, /* mulr */ + 12, /* muli */ 4, /* qmulr */ - 12, /* qmuli */ + 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 32, /* divr */ - 36, /* divi */ - 24, /* divr_u */ - 28, /* divi_u */ - 18, /* qdivr */ - 22, /* qdivi */ - 18, /* qdivr_u */ - 22, /* qdivi_u */ - 24, /* remr */ - 32, /* remi */ - 24, /* remr_u */ - 28, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 12, /* andi */ 4, /* orr */ - 12, /* ori */ + 12, /* ori */ 4, /* xorr */ - 12, /* xori */ + 12, /* xori */ 4, /* lshr */ 4, /* lshi */ 4, /* rshr */ @@ -622,98 +626,98 @@ 4, /* negi */ 4, /* comr */ 4, /* comi */ - 14, /* ltr */ - 14, /* lti */ - 14, /* ltr_u */ - 14, /* lti_u */ - 14, /* ler */ - 14, /* lei */ - 14, /* ler_u */ - 14, /* lei_u */ - 14, /* eqr */ - 14, /* eqi */ - 14, /* ger */ - 14, /* gei */ - 14, /* ger_u */ - 14, /* gei_u */ - 14, /* gtr */ - 14, /* gti */ - 14, /* gtr_u */ - 14, /* gti_u */ - 14, /* ner */ - 14, /* nei */ + 14, /* ltr */ + 14, /* lti */ + 14, /* ltr_u */ + 14, /* lti_u */ + 14, /* ler */ + 14, /* lei */ + 14, /* ler_u */ + 14, /* lei_u */ + 14, /* eqr */ + 14, /* eqi */ + 14, /* ger */ + 14, /* gei */ + 14, /* ger_u */ + 14, /* gei_u */ + 14, /* gtr */ + 14, /* gti */ + 14, /* gtr_u */ + 14, /* gti_u */ + 14, /* ner */ + 14, /* nei */ 4, /* movr */ 8, /* movi */ 8, /* movnr */ 8, /* movzr */ - 42, /* casr */ - 46, /* casi */ - 8, /* extr_c */ + 42, /* casr */ + 50, /* casi */ + 4, /* extr_c */ 4, /* exti_c */ 4, /* extr_uc */ 4, /* exti_uc */ - 8, /* extr_s */ + 4, /* extr_s */ 4, /* exti_s */ - 8, /* extr_us */ + 4, /* extr_us */ 4, /* exti_us */ 0, /* extr_i */ 0, /* exti_i */ 0, /* extr_ui */ 0, /* exti_ui */ - 20, /* bswapr_us */ + 8, /* bswapr_us */ 4, /* bswapi_us */ - 16, /* bswapr_ui */ + 4, /* bswapr_ui */ 8, /* bswapi_ui */ 0, /* bswapr_ul */ 0, /* bswapi_ul */ - 20, /* htonr_us */ + 8, /* htonr_us */ 4, /* htoni_us */ - 16, /* htonr_ui */ + 4, /* htonr_ui */ 8, /* htoni_ui */ 0, /* htonr_ul */ 0, /* htoni_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 12, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 12, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 12, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 12, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 12, /* ldi_i */ 0, /* ldr_ui */ 0, /* ldi_ui */ 0, /* ldr_l */ 0, /* ldi_l */ 4, /* ldxr_c */ - 12, /* ldxi_c */ + 12, /* ldxi_c */ 4, /* ldxr_uc */ - 12, /* ldxi_uc */ + 12, /* ldxi_uc */ 4, /* ldxr_s */ - 12, /* ldxi_s */ + 12, /* ldxi_s */ 4, /* ldxr_us */ - 12, /* ldxi_us */ + 12, /* ldxi_us */ 4, /* ldxr_i */ - 12, /* ldxi_i */ + 12, /* ldxi_i */ 0, /* ldxr_ui */ 0, /* ldxi_ui */ 0, /* ldxr_l */ 0, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 12, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 12, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 12, /* sti_i */ 0, /* str_l */ 0, /* sti_l */ 4, /* stxr_c */ - 12, /* stxi_c */ + 12, /* stxi_c */ 4, /* stxr_s */ - 12, /* stxi_s */ + 12, /* stxi_s */ 4, /* stxr_i */ - 12, /* stxi_i */ + 12, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ 8, /* bltr */ @@ -725,7 +729,7 @@ 8, /* bler_u */ 8, /* blei_u */ 8, /* beqr */ - 16, /* beqi */ + 16, /* beqi */ 8, /* bger */ 8, /* bgei */ 8, /* bger_u */ @@ -735,7 +739,7 @@ 8, /* bgtr_u */ 8, /* bgti_u */ 8, /* bner */ - 16, /* bnei */ + 16, /* bnei */ 8, /* bmsr */ 8, /* bmsi */ 8, /* bmcr */ @@ -756,10 +760,10 @@ 8, /* bxsubi */ 8, /* bxsubr_u */ 8, /* bxsubi_u */ - 12, /* jmpr */ - 72, /* jmpi */ + 4, /* jmpr */ + 8, /* jmpi */ 4, /* callr */ - 20, /* calli */ + 20, /* calli */ 0, /* prepare */ 0, /* pushargr_c */ 0, /* pushargi_c */ @@ -804,91 +808,91 @@ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ - 24, /* addr_f */ - 24, /* addi_f */ - 24, /* subr_f */ - 24, /* subi_f */ - 24, /* rsbi_f */ - 24, /* mulr_f */ - 24, /* muli_f */ - 24, /* divr_f */ - 24, /* divi_f */ - 12, /* negr_f */ + 4, /* addr_f */ + 8, /* addi_f */ + 4, /* subr_f */ + 8, /* subi_f */ + 8, /* rsbi_f */ + 4, /* mulr_f */ + 8, /* muli_f */ + 4, /* divr_f */ + 8, /* divi_f */ + 4, /* negr_f */ 0, /* negi_f */ - 12, /* absr_f */ + 4, /* absr_f */ 0, /* absi_f */ - 20, /* sqrtr_f */ + 4, /* sqrtr_f */ 0, /* sqrti_f */ - 24, /* ltr_f */ - 30, /* lti_f */ - 24, /* ler_f */ - 32, /* lei_f */ - 24, /* eqr_f */ - 30, /* eqi_f */ - 24, /* ger_f */ - 30, /* gei_f */ - 24, /* gtr_f */ - 30, /* gti_f */ - 28, /* ner_f */ - 32, /* nei_f */ - 56, /* unltr_f */ - 64, /* unlti_f */ - 56, /* unler_f */ - 64, /* unlei_f */ - 56, /* uneqr_f */ - 64, /* uneqi_f */ - 56, /* unger_f */ - 64, /* ungei_f */ - 56, /* ungtr_f */ - 64, /* ungti_f */ - 60, /* ltgtr_f */ - 68, /* ltgti_f */ - 28, /* ordr_f */ - 32, /* ordi_f */ - 56, /* unordr_f */ - 64, /* unordi_f */ - 20, /* truncr_f_i */ + 18, /* ltr_f */ + 30, /* lti_f */ + 20, /* ler_f */ + 32, /* lei_f */ + 18, /* eqr_f */ + 30, /* eqi_f */ + 18, /* ger_f */ + 30, /* gei_f */ + 18, /* gtr_f */ + 30, /* gti_f */ + 18, /* ner_f */ + 30, /* nei_f */ + 18, /* unltr_f */ + 30, /* unlti_f */ + 18, /* unler_f */ + 30, /* unlei_f */ + 24, /* uneqr_f */ + 36, /* uneqi_f */ + 18, /* unger_f */ + 30, /* ungei_f */ + 18, /* ungtr_f */ + 30, /* ungti_f */ + 24, /* ltgtr_f */ + 36, /* ltgti_f */ + 18, /* ordr_f */ + 30, /* ordi_f */ + 18, /* unordr_f */ + 30, /* unordi_f */ + 8, /* truncr_f_i */ 0, /* truncr_f_l */ - 28, /* extr_f */ - 22, /* extr_d_f */ - 8, /* movr_f */ - 16, /* movi_f */ - 8, /* ldr_f */ - 16, /* ldi_f */ + 8, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 12, /* movi_f */ + 4, /* ldr_f */ + 12, /* ldi_f */ 8, /* ldxr_f */ - 16, /* ldxi_f */ - 8, /* str_f */ - 16, /* sti_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 12, /* sti_f */ 8, /* stxr_f */ - 16, /* stxi_f */ - 28, /* bltr_f */ - 32, /* blti_f */ - 28, /* bler_f */ - 32, /* blei_f */ - 28, /* beqr_f */ - 48, /* beqi_f */ - 28, /* bger_f */ - 32, /* bgei_f */ - 28, /* bgtr_f */ - 32, /* bgti_f */ - 28, /* bner_f */ - 32, /* bnei_f */ - 28, /* bunltr_f */ - 32, /* bunlti_f */ - 28, /* bunler_f */ - 32, /* bunlei_f */ - 60, /* buneqr_f */ - 68, /* buneqi_f */ - 28, /* bunger_f */ - 32, /* bungei_f */ - 28, /* bungtr_f */ - 32, /* bungti_f */ - 60, /* bltgtr_f */ - 68, /* bltgti_f */ - 28, /* bordr_f */ - 32, /* bordi_f */ - 28, /* bunordr_f */ - 32, /* bunordi_f */ + 16, /* stxi_f */ + 12, /* bltr_f */ + 24, /* blti_f */ + 12, /* bler_f */ + 24, /* blei_f */ + 12, /* beqr_f */ + 24, /* beqi_f */ + 12, /* bger_f */ + 24, /* bgei_f */ + 12, /* bgtr_f */ + 24, /* bgti_f */ + 12, /* bner_f */ + 24, /* bnei_f */ + 16, /* bunltr_f */ + 28, /* bunlti_f */ + 16, /* bunler_f */ + 28, /* bunlei_f */ + 20, /* buneqr_f */ + 32, /* buneqi_f */ + 16, /* bunger_f */ + 28, /* bungei_f */ + 12, /* bungtr_f */ + 24, /* bungti_f */ + 20, /* bltgtr_f */ + 32, /* bltgti_f */ + 12, /* bordr_f */ + 24, /* bordi_f */ + 12, /* bunordr_f */ + 24, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -898,91 +902,91 @@ 0, /* getarg_d */ 0, /* putargr_d */ 0, /* putargi_d */ - 34, /* addr_d */ - 36, /* addi_d */ - 34, /* subr_d */ - 36, /* subi_d */ - 36, /* rsbi_d */ - 34, /* mulr_d */ - 36, /* muli_d */ - 34, /* divr_d */ - 36, /* divi_d */ - 20, /* negr_d */ + 4, /* addr_d */ + 20, /* addi_d */ + 4, /* subr_d */ + 20, /* subi_d */ + 20, /* rsbi_d */ + 4, /* mulr_d */ + 20, /* muli_d */ + 4, /* divr_d */ + 20, /* divi_d */ + 4, /* negr_d */ 0, /* negi_d */ - 20, /* absr_d */ + 4, /* absr_d */ 0, /* absi_d */ - 26, /* sqrtr_d */ + 4, /* sqrtr_d */ 0, /* sqrti_d */ - 28, /* ltr_d */ - 34, /* lti_d */ - 28, /* ler_d */ - 36, /* lei_d */ - 28, /* eqr_d */ - 34, /* eqi_d */ - 28, /* ger_d */ - 34, /* gei_d */ - 28, /* gtr_d */ - 34, /* gti_d */ - 32, /* ner_d */ - 36, /* nei_d */ - 66, /* unltr_d */ - 72, /* unlti_d */ - 66, /* unler_d */ - 72, /* unlei_d */ - 66, /* uneqr_d */ - 72, /* uneqi_d */ - 66, /* unger_d */ - 72, /* ungei_d */ - 66, /* ungtr_d */ - 72, /* ungti_d */ - 70, /* ltgtr_d */ - 76, /* ltgti_d */ - 32, /* ordr_d */ - 36, /* ordi_d */ - 66, /* unordr_d */ - 72, /* unordi_d */ - 20, /* truncr_d_i */ + 18, /* ltr_d */ + 34, /* lti_d */ + 20, /* ler_d */ + 36, /* lei_d */ + 18, /* eqr_d */ + 34, /* eqi_d */ + 18, /* ger_d */ + 34, /* gei_d */ + 18, /* gtr_d */ + 34, /* gti_d */ + 18, /* ner_d */ + 34, /* nei_d */ + 18, /* unltr_d */ + 34, /* unlti_d */ + 18, /* unler_d */ + 34, /* unlei_d */ + 24, /* uneqr_d */ + 40, /* uneqi_d */ + 18, /* unger_d */ + 34, /* ungei_d */ + 18, /* ungtr_d */ + 34, /* ungti_d */ + 24, /* ltgtr_d */ + 40, /* ltgti_d */ + 18, /* ordr_d */ + 34, /* ordi_d */ + 18, /* unordr_d */ + 34, /* unordi_d */ + 8, /* truncr_d_i */ 0, /* truncr_d_l */ - 36, /* extr_d */ - 22, /* extr_f_d */ - 16, /* movr_d */ - 32, /* movi_d */ - 16, /* ldr_d */ - 24, /* ldi_d */ - 20, /* ldxr_d */ - 28, /* ldxi_d */ - 16, /* str_d */ - 24, /* sti_d */ - 20, /* stxr_d */ - 28, /* stxi_d */ - 32, /* bltr_d */ - 36, /* blti_d */ - 32, /* bler_d */ - 36, /* blei_d */ - 32, /* beqr_d */ - 52, /* beqi_d */ - 32, /* bger_d */ - 36, /* bgei_d */ - 32, /* bgtr_d */ - 36, /* bgti_d */ - 32, /* bner_d */ - 36, /* bnei_d */ - 32, /* bunltr_d */ - 36, /* bunlti_d */ - 32, /* bunler_d */ - 36, /* bunlei_d */ - 68, /* buneqr_d */ - 76, /* buneqi_d */ - 32, /* bunger_d */ - 36, /* bungei_d */ - 32, /* bungtr_d */ - 36, /* bungti_d */ - 68, /* bltgtr_d */ - 76, /* bltgti_d */ - 32, /* bordr_d */ - 36, /* bordi_d */ - 32, /* bunordr_d */ - 36, /* bunordi_d */ + 8, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 32, /* movi_d */ + 4, /* ldr_d */ + 12, /* ldi_d */ + 8, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 12, /* sti_d */ + 8, /* stxr_d */ + 16, /* stxi_d */ + 12, /* bltr_d */ + 28, /* blti_d */ + 12, /* bler_d */ + 28, /* blei_d */ + 12, /* beqr_d */ + 36, /* beqi_d */ + 12, /* bger_d */ + 28, /* bgei_d */ + 12, /* bgtr_d */ + 28, /* bgti_d */ + 12, /* bner_d */ + 28, /* bnei_d */ + 16, /* bunltr_d */ + 32, /* bunlti_d */ + 16, /* bunler_d */ + 32, /* bunlei_d */ + 20, /* buneqr_d */ + 36, /* buneqi_d */ + 16, /* bunger_d */ + 32, /* bungei_d */ + 12, /* bungtr_d */ + 28, /* bungti_d */ + 20, /* bltgtr_d */ + 36, /* bltgti_d */ + 12, /* bordr_d */ + 28, /* bordi_d */ + 12, /* bunordr_d */ + 28, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -990,71 +994,75 @@ 0, /* retval_d */ 4, /* movr_w_f */ 8, /* movi_w_f */ - 8, /* movr_ww_d */ - 20, /* movi_ww_d */ + 4, /* movr_ww_d */ + 16, /* movi_ww_d */ 0, /* movr_w_d */ 0, /* movi_w_d */ 4, /* movr_f_w */ - 8, /* movi_f_w */ - 8, /* movr_d_ww */ - 12, /* movi_d_ww */ + 4, /* movi_f_w */ + 4, /* movr_d_ww */ + 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ 8, /* clor */ 4, /* cloi */ 4, /* clzr */ 4, /* clzi */ - 12, /* ctor */ + 12, /* ctor */ 4, /* ctoi */ 8, /* ctzr */ 4, /* ctzi */ 4, /* rbitr */ 8, /* rbiti */ - 40, /* popcntr */ + 40, /* popcntr */ 4, /* popcnti */ - 12, /* lrotr */ + 12, /* lrotr */ 4, /* lroti */ 4, /* rrotr */ 4, /* rroti */ - 8, /* extr */ + 4, /* extr */ 4, /* exti */ - 12, /* extr_u */ + 4, /* extr_u */ 4, /* exti_u */ - 24, /* depr */ - 20, /* depi */ - 50, /* qlshr */ + 4, /* depr */ + 8, /* depi */ + 50, /* qlshr */ 8, /* qlshi */ - 50, /* qlshr_u */ + 50, /* qlshr_u */ 8, /* qlshi_u */ - 50, /* qrshr */ + 50, /* qrshr */ 8, /* qrshi */ - 50, /* qrshr_u */ + 50, /* qrshr_u */ 8, /* qrshi_u */ - 72, /* unldr */ - 44, /* unldi */ - 72, /* unldr_u */ - 44, /* unldi_u */ - 68, /* unstr */ - 44, /* unsti */ + 72, /* unldr */ + 44, /* unldi */ + 72, /* unldr_u */ + 44, /* unldi_u */ + 68, /* unstr */ + 44, /* unsti */ 140, /* unldr_x */ - 76, /* unldi_x */ + 76, /* unldi_x */ 144, /* unstr_x */ - 96, /* unsti_x */ - 48, /* fmar_f */ + 92, /* unsti_x */ + 8, /* fmar_f */ 0, /* fmai_f */ - 48, /* fmsr_f */ + 8, /* fmsr_f */ 0, /* fmsi_f */ - 68, /* fmar_d */ + 8, /* fmar_d */ 0, /* fmai_d */ - 68, /* fmsr_d */ + 8, /* fmsr_d */ 0, /* fmsi_d */ - 60, /* fnmar_f */ + 12, /* fnmar_f */ 0, /* fnmai_f */ - 60, /* fnmsr_f */ + 12, /* fnmsr_f */ 0, /* fnmsi_f */ - 88, /* fnmar_d */ + 12, /* fnmar_d */ 0, /* fnmai_d */ - 88, /* fnmsr_d */ + 12, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 12, /* hmuli */ + 4, /* hmulr_u */ + 8, /* hmuli_u */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index df6c0e7..25aa7cb 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1573,6 +1573,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index bb6e540..4db79d8 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -707,6 +707,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2) +#define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0) +#define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2) +#define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0) static long long __llmul(int, int); #define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) static void _qmulr(jit_state_t*, @@ -1939,7 +1943,8 @@ _qmulr(jit_state_t *_jit, movr(_R26_REGNO, r2); movr(_R25_REGNO, r3); calli((jit_word_t)__llmul); - movr(r0, _R29_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _R29_REGNO); movr(r1, _R28_REGNO); } @@ -1950,7 +1955,8 @@ _qmuli(jit_state_t *_jit, movr(_R26_REGNO, r2); movi(_R25_REGNO, i0); calli((jit_word_t)__llmul); - movr(r0, _R29_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _R29_REGNO); movr(r1, _R28_REGNO); } @@ -1967,7 +1973,8 @@ _qmulr_u(jit_state_t *_jit, ldxi_f(rn(t1), _FP_REGNO, alloca_offset - 8); XMPYU(rn(t0), rn(t1), rn(t0)); stxi_d(alloca_offset - 8, _FP_REGNO, rn(t0)); - ldxi(r0, _FP_REGNO, alloca_offset - 4); + if (r0 != JIT_NOREG) + ldxi(r0, _FP_REGNO, alloca_offset - 4); ldxi(r1, _FP_REGNO, alloca_offset - 8); jit_unget_reg(t1); jit_unget_reg(t0); diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index c460264..7a23ebd 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -524,4 +524,8 @@ 0, /* fnmai_d */ 12, /* fnmsr_d */ 0, /* fnmsi_d */ + 36, /* hmulr */ + 40, /* hmuli */ + 48, /* hmulr_u */ + 56, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index b6b1f59..6330bf6 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -1024,6 +1024,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 4460940..a4ec58d 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1206,6 +1206,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define hmulr(r0,r1,r2) mulh(r0,r1,r2,1) +#define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define hmulr_u(r0,r1,r2) mulh(r0,r1,r2,0) +#define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) @@ -3890,6 +3896,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + #if !defined(__GNUC__) static long __divdi3(long u, long v) diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index 2a8feaf..9033334 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -524,4 +524,8 @@ 0, /* fnmai_d */ 16, /* fnmsr_d */ 0, /* fnmsi_d */ + 32, /* hmulr */ + 32, /* hmuli */ + 32, /* hmulr_u */ + 32, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index d385e8d..f689231 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1171,6 +1171,10 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_loongarch-cpu.c b/lib/jit_loongarch-cpu.c index 46e8ce7..22ca0f0 100644 --- a/lib/jit_loongarch-cpu.c +++ b/lib/jit_loongarch-cpu.c @@ -386,6 +386,12 @@ static void _rsbi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define mulr(r0, r1, r2) MUL_D(r0, r1, r2) # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define hmulr(r0, r1, r2) MULH_D(r0, r1, r2) +# define hmuli(r0, r1, i0) _hmuli(_jit, r0, r1, i0) +static void _hmuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define hmulr_u(r0, r1, r2) MULH_DU(r0, r1, r2) +# define hmuli_u(r0, r1, i0) _hmuli_u(_jit, r0, r1, i0) +static void _hmuli_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define qmulr(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 1) # define qmulr_u(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 0) # define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign) @@ -1244,6 +1250,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { diff --git a/lib/jit_loongarch-sz.c b/lib/jit_loongarch-sz.c index 377dbc0..18e73aa 100644 --- a/lib/jit_loongarch-sz.c +++ b/lib/jit_loongarch-sz.c @@ -524,4 +524,8 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 20, /* hmuli */ + 4, /* hmulr_u */ + 20, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c index b892563..cd38c4e 100644 --- a/lib/jit_loongarch.c +++ b/lib/jit_loongarch.c @@ -1098,6 +1098,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 7a3b600..37031c4 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -588,6 +588,14 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2) +static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2) +static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) # define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) @@ -2085,6 +2093,48 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_mips6_p()) + muh_r6(r0, r1, r2); + else { + mult(r1, r2); + MFHI(r0); + } +} + +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_mips6_p()) + muhu_r6(r0, r1, r2); + else { + multu(r1, r2); + MFHI(r0); + } +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 8c5cc52..156fc95 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -525,6 +525,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 8, /* hmulr */ + 16, /* hmuli */ + 8, /* hmulr_u */ + 16, /* hmuli_u */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -1053,4 +1057,8 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 8, /* hmulr */ + 28, /* hmuli */ + 8, /* hmulr_u */ + 28, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 50cccf1..1fec109 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1551,6 +1551,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_names.c b/lib/jit_names.c index 52f1eef..88bc717 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -289,4 +289,6 @@ static char *code_name[] = { "fnmsr_f", "fnmsi_f", "fnmar_d", "fnmai_d", "fnmsr_d", "fnmsi_d", + "hmulr", "hmuli", + "hmulr_u", "hmuli_u", }; diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 5bd1aa3..f84998b 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -600,16 +600,20 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define mulr(r0,r1,r2) MULLW(r0,r1,r2) # define mullr(r0,r1,r2) MULLW(r0,r1,r2) -# define mulhr(r0,r1,r2) MULHW(r0,r1,r2) -# define mulhr_u(r0,r1,r2) MULHWU(r0,r1,r2) +# define hmulr(r0,r1,r2) MULHW(r0,r1,r2) +# define hmulr_u(r0,r1,r2) MULHWU(r0,r1,r2) # else # define mulr(r0,r1,r2) MULLD(r0,r1,r2) # define mullr(r0,r1,r2) MULLD(r0,r1,r2) -# define mulhr(r0,r1,r2) MULHD(r0,r1,r2) -# define mulhr_u(r0,r1,r2) MULHDU(r0,r1,r2) +# define hmulr(r0,r1,r2) MULHD(r0,r1,r2) +# define hmulr_u(r0,r1,r2) MULHDU(r0,r1,r2) # endif # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) # define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) @@ -1535,6 +1539,9 @@ static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; + /* NOTE verified and overflow is correctly computed. + * No need to check for __WORDSIZE == 32. + * Documented as a 32 bit instruction. */ if (can_sign_extend_short_p(i0)) MULLI(r0, r1, i0); else { @@ -1546,6 +1553,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { @@ -1557,9 +1584,9 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, else mullr(r0, r2, r3); if (sign) - mulhr(r1, r2, r3); + hmulr(r1, r2, r3); else - mulhr_u(r1, r2, r3); + hmulr_u(r1, r2, r3); if (r0 == r2 || r0 == r3) { movr(r0, rn(reg)); jit_unget_reg(reg); diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 2015464..136f1d4 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -527,6 +527,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 12, /* hmuli */ + 4, /* hmulr_u */ + 12, /* hmuli_u */ #endif /* !_CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -1061,6 +1065,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 12, /* hmuli */ + 4, /* hmulr_u */ + 12, /* hmuli_u */ #endif /* _CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -1594,6 +1602,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 24, /* hmuli */ + 4, /* hmulr_u */ + 24, /* hmuli_u */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -2126,6 +2138,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 24, /* hmuli */ + 4, /* hmulr_u */ + 24, /* hmuli_u */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index bddd523..9f98176 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1377,6 +1377,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c index 04174ae..8d6115a 100644 --- a/lib/jit_riscv-cpu.c +++ b/lib/jit_riscv-cpu.c @@ -302,6 +302,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0, r1, r2) MUL(r0, r1, r2) # define muli(r0, r1, im) _muli(_jit, r0, r1, im) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0, r1, r2) MULH(r0, r1, r2) +# define hmuli(r0, r1, im) _hmuli(_jit, r0, r1, im) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr_u(r0, r1, r2) MULHU(r0, r1, r2) +# define hmuli_u(r0, r1, im) _hmuli_u(_jit, r0, r1, im) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define divr(r0, r1, r2) DIV(r0, r1, r2) # define divi(r0, r1, im) _divi(_jit, r0, r1, im) static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -939,6 +945,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + hmulr(r0, r1, rn(t0)); + jit_unget_reg(t0); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + hmulr_u(r0, r1, rn(t0)); + jit_unget_reg(t0); +} + +static void _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t t0; diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c index 887f8dc..c08e5bd 100644 --- a/lib/jit_riscv-sz.c +++ b/lib/jit_riscv-sz.c @@ -524,4 +524,8 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 4, /* hmulr */ + 16, /* hmuli */ + 4, /* hmulr_u */ + 16, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c index 2a399a9..27b0c5a 100644 --- a/lib/jit_riscv.c +++ b/lib/jit_riscv.c @@ -1149,6 +1149,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 3fe3e07..23f7346 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -1023,6 +1023,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2) +# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0) +# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2) +# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0) # define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) static void _qmulr(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t); @@ -2765,14 +2769,14 @@ _qmulr(jit_state_t *_jit, jit_int32_t reg; /* The only invalid condition is r0 == r1 */ jit_int32_t t2, t3, s2, s3; - if (r2 == r0 || r2 == r1) { + if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) { s2 = jit_get_reg(jit_class_gpr); t2 = rn(s2); movr(t2, r2); } else t2 = r2; - if (r3 == r0 || r3 == r1) { + if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) { s3 = jit_get_reg(jit_class_gpr); t3 = rn(s3); movr(t3, r3); @@ -2815,7 +2819,8 @@ _qmulr_u(jit_state_t *_jit, regno = jit_get_reg_pair(); movr(rn(regno) + 1, r2); MULU_(rn(regno), r3); - movr(r0, rn(regno) + 1); + if (r0 != JIT_NOREG) + movr(r0, rn(regno) + 1); movr(r1, rn(regno)); jit_unget_reg_pair(regno); } @@ -2829,7 +2834,8 @@ _qmuli_u(jit_state_t *_jit, movr(rn(regno) + 1, r2); movi(rn(regno), i0); MULU_(rn(regno), rn(regno)); - movr(r0, rn(regno) + 1); + if (r0 != JIT_NOREG) + movr(r0, rn(regno) + 1); movr(r1, rn(regno)); jit_unget_reg_pair(regno); } diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c index c71b040..b8a87e8 100644 --- a/lib/jit_s390-sz.c +++ b/lib/jit_s390-sz.c @@ -524,6 +524,10 @@ 0, /* fnmai_d */ 10, /* fnmsr_d */ 0, /* fnmsi_d */ + 34, /* hmulr */ + 42, /* hmuli */ + 8, /* hmulr_u */ + 16, /* hmuli_u */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -1052,4 +1056,8 @@ 0, /* fnmai_d */ 10, /* fnmsr_d */ 0, /* fnmsi_d */ + 44, /* hmulr */ + 60, /* hmuli */ + 12, /* hmulr_u */ + 28, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 858ea30..851d0d0 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -1115,6 +1115,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 3a86f9e..6562867 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -630,6 +630,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2) +# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0) +# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2) +# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0) # if __WORDSIZE == 32 # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) @@ -1633,6 +1637,8 @@ static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { + if (r0 == JIT_NOREG) + r0 = r1; if (sign) SMUL(r2, r3, r0); else @@ -1646,6 +1652,8 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, { jit_int32_t reg; if (s13_p(i0)) { + if (r0 == JIT_NOREG) + r0 = r1; if (sign) SMULI(r2, i0, r0); else @@ -1698,7 +1706,8 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movr(_O0_REGNO, r3); movr(_O1_REGNO, r2); calli((jit_word_t)__llmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1711,7 +1720,8 @@ _qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movi(_O0_REGNO, i0); movr(_O1_REGNO, r2); calli((jit_word_t)__llmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1729,7 +1739,8 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movr(_O0_REGNO, r3); movr(_O1_REGNO, r2); calli((jit_word_t)__ullmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1742,7 +1753,8 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movi(_O0_REGNO, i0); movr(_O1_REGNO, r2); calli((jit_word_t)__ullmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index a8aeb1c..8a4ce7b 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -524,6 +524,10 @@ 0, /* fnmai_d */ 4, /* fnmsr_d */ 0, /* fnmsi_d */ + 8, /* hmulr */ + 16, /* hmuli */ + 8, /* hmulr_u */ + 16, /* hmuli_u */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -1052,4 +1056,8 @@ 0, /* fnmai_d */ 20, /* fnmsr_d */ 0, /* fnmsi_d */ + 44, /* hmulr */ + 60, /* hmuli */ + 44, /* hmulr_u */ + 60, /* hmuli_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 1acf636..bd8756d 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -1409,6 +1409,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 47529cf..44f5b45 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -256,6 +256,10 @@ static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1) +# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0) +# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1) +# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0) # define umulr(r0) unr(X86_IMUL, r0) # define umulr_u(r0) unr(X86_MUL, r0) # define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1) @@ -1525,14 +1529,20 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, else umulr_u(mul); - if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) - xchgr(_RAX_REGNO, _RDX_REGNO); + if (r0 != JIT_NOREG) { + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) + xchgr(_RAX_REGNO, _RDX_REGNO); + else { + if (r0 != _RDX_REGNO) + movr(r0, _RAX_REGNO); + movr(r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(r0, _RAX_REGNO); + } + } else { - if (r0 != _RDX_REGNO) - movr(r0, _RAX_REGNO); + assert(r1 != JIT_NOREG); movr(r1, _RDX_REGNO); - if (r0 == _RDX_REGNO) - movr(r0, _RAX_REGNO); } clear(_RDX_REGNO, _RDX); diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index a2b608c..99bb625 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -525,6 +525,10 @@ 0, /* fnmai_d */ 27, /* fnmsr_d */ 0, /* fnmsi_d */ + 18, /* hmulr */ + 23, /* hmuli */ + 18, /* hmulr_u */ + 23, /* hmuli_u */ #endif /* __X32 */ #if __X64 @@ -1054,6 +1058,10 @@ 0, /* fnmai_d */ 30, /* fnmsr_d */ 0, /* fnmsi_d */ + 17, /* hmulr */ + 27, /* hmuli */ + 17, /* hmulr_u */ + 27, /* hmuli_u */ #else # if __X64_32 @@ -1582,6 +1590,10 @@ 0, /* fnmai_d */ 31, /* fnmsr_d */ 0, /* fnmsi_d */ + 15, /* hmulr */ + 21, /* hmuli */ + 15, /* hmulr_u */ + 21, /* hmuli_u */ #else #define JIT_INSTR_MAX 112 @@ -2109,6 +2121,10 @@ 0, /* fnmai_d */ 31, /* fnmsr_d */ 0, /* fnmsi_d */ + 17, /* hmulr */ + 27, /* hmuli */ + 17, /* hmulr_u */ + 27, /* hmuli_u */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/lib/jit_x86.c b/lib/jit_x86.c index fb5f3ca..dd4fccd 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1798,6 +1798,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); diff --git a/lib/lightning.c b/lib/lightning.c index d1d8ffc..25a6078 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1551,7 +1551,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_addi: case jit_code_addxi: case jit_code_addci: case jit_code_subi: case jit_code_subxi: case jit_code_subci: case jit_code_rsbi: - case jit_code_muli: case jit_code_divi: case jit_code_divi_u: + case jit_code_muli: case jit_code_hmuli: case jit_code_hmuli_u: + case jit_code_divi: case jit_code_divi_u: case jit_code_remi: case jit_code_remi_u: case jit_code_andi: case jit_code_ori: case jit_code_xori: case jit_code_lshi: case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti: @@ -1592,7 +1593,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_addr: case jit_code_addxr: case jit_code_addcr: case jit_code_subr: case jit_code_subxr: case jit_code_subcr: - case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: + case jit_code_mulr: case jit_code_hmulr: case jit_code_hmulr_u: + case jit_code_divr: case jit_code_divr_u: case jit_code_remr: case jit_code_remr_u: case jit_code_andr: case jit_code_orr: case jit_code_xorr: case jit_code_lshr: case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr: |