lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年08月21日 19:45:10 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年08月21日 19:45:10 -0300
commit512f9c3ccd3ec43d95fb235040451558b816cfff (patch)
treeac9afd08546379908922b86fff6639e85f49e19d
parent3d72aba731677c1c262692a584bd41b6b12c792e (diff)
downloadlightning-512f9c3ccd3ec43d95fb235040451558b816cfff.tar.gz
Add back the jit_hmul interfaces
In Lightning 1.x it did exist, but at first jit_qmul appeared to provide all usages, as hmul was used for high bits computation of a complete multiplication. It turns out there might be other cases where only the top bits are really required. One example is division by constants. Now an optimized version that attempts to reduce used instructions when applicable has been added. * check/Makefile.am, check/lightning.c: Add new hmul tests. * doc/body.texi: Document hmul. * include/lightning.h.in: Create the new hmul codes. * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_loongarch-cpu.c, lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c, lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c, lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Implement hmul and update the *-sz.c files. * lib/jit_names.c, lib/lightning.c: Add knowledge of hmul.
Diffstat
-rw-r--r--ChangeLog 20
-rw-r--r--check/Makefile.am 19
-rw-r--r--check/alu_hmul.ok 1
-rw-r--r--check/alu_hmul.tst 31
-rw-r--r--check/lightning.c 6
-rw-r--r--doc/body.texi 2
-rw-r--r--include/lightning.h.in 7
-rw-r--r--lib/jit_aarch64-cpu.c 26
-rw-r--r--lib/jit_aarch64-sz.c 9
-rw-r--r--lib/jit_aarch64.c 4
-rw-r--r--lib/jit_alpha-cpu.c 48
-rw-r--r--lib/jit_alpha-sz.c 4
-rw-r--r--lib/jit_alpha.c 4
-rw-r--r--lib/jit_arm-cpu.c 33
-rw-r--r--lib/jit_arm-sz.c 1078
-rw-r--r--lib/jit_arm.c 4
-rw-r--r--lib/jit_hppa-cpu.c 13
-rw-r--r--lib/jit_hppa-sz.c 4
-rw-r--r--lib/jit_hppa.c 4
-rw-r--r--lib/jit_ia64-cpu.c 26
-rw-r--r--lib/jit_ia64-sz.c 4
-rw-r--r--lib/jit_ia64.c 4
-rw-r--r--lib/jit_loongarch-cpu.c 26
-rw-r--r--lib/jit_loongarch-sz.c 4
-rw-r--r--lib/jit_loongarch.c 4
-rw-r--r--lib/jit_mips-cpu.c 50
-rw-r--r--lib/jit_mips-sz.c 8
-rw-r--r--lib/jit_mips.c 4
-rw-r--r--lib/jit_names.c 2
-rw-r--r--lib/jit_ppc-cpu.c 39
-rw-r--r--lib/jit_ppc-sz.c 16
-rw-r--r--lib/jit_ppc.c 4
-rw-r--r--lib/jit_riscv-cpu.c 26
-rw-r--r--lib/jit_riscv-sz.c 4
-rw-r--r--lib/jit_riscv.c 4
-rw-r--r--lib/jit_s390-cpu.c 14
-rw-r--r--lib/jit_s390-sz.c 8
-rw-r--r--lib/jit_s390.c 4
-rw-r--r--lib/jit_sparc-cpu.c 20
-rw-r--r--lib/jit_sparc-sz.c 8
-rw-r--r--lib/jit_sparc.c 4
-rw-r--r--lib/jit_x86-cpu.c 22
-rw-r--r--lib/jit_x86-sz.c 16
-rw-r--r--lib/jit_x86.c 4
-rw-r--r--lib/lightning.c 6
45 files changed, 1064 insertions, 584 deletions
diff --git a/ChangeLog b/ChangeLog
index 5013515..8a5588a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2023年08月21日 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am, check/lightning.c: Add new hmul tests.
+ * doc/body.texi: Document hmul.
+ * include/lightning.h.in: Create the new hmul codes.
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+ lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c,
+ lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c,
+ lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+ lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_loongarch-cpu.c, lib/jit_loongarch-sz.c, lib/jit_loongarch.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c,
+ lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c,
+ lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c,
+ lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c,
+ lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Implement
+ hmul and update the *-sz.c files.
+ * lib/jit_names.c, lib/lightning.c: Add knowledge of hmul.
+
2023年04月18日 Paulo Andrade <pcpa@gnu.org>
* include/lightning.h.in: Define new fmar_f, fmai_f, fmsr_f,
diff --git a/check/Makefile.am b/check/Makefile.am
index 7142340..1f086ef 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -106,6 +106,7 @@ EXTRA_DIST = \
alux_sub.tst alux_sub.ok \
alu_rsb.tst alu_rsb.ok \
alu_mul.tst alu_mul.ok \
+ alu_hmul.tst alu_hmul.ok \
alu_div.tst alu_div.ok \
alu_rem.tst alu_rem.ok \
alu_and.tst alu_and.ok \
@@ -164,7 +165,8 @@ base_TESTS = \
ext cvt hton bswap branch \
alu_add alux_add \
alu_sub alux_sub alu_rsb \
- alu_mul alu_div alu_rem \
+ alu_mul alu_hmul \
+ alu_div alu_rem \
alu_and alu_or alu_xor \
alu_lsh alu_rsh \
alu_com alu_neg alu_rot \
@@ -194,7 +196,8 @@ x87_TESTS = \
ext.x87 cvt.x87 branch.x87 \
alu_add.x87 alux_add.x87 \
alu_sub.x87 alux_sub.x87 alu_rsb.x87 \
- alu_mul.x87 alu_div.x87 alu_rem.x87 \
+ alu_mul.x87 alu_hmul.x87 \
+ alu_div.x87 alu_rem.x87 \
alu_and.x87 alu_or.x87 alu_xor.x87 \
alu_lsh.x87 alu_rsh.x87 alu_rot.x87 \
alu_com.x87 alu_neg.x87 \
@@ -218,7 +221,8 @@ x87_nodata_TESTS = \
ext.x87.nodata cvt.x87.nodata branch.x87.nodata \
alu_add.x87.nodata alux_add.x87.nodata \
alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata \
- alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \
+ alu_mul.x87.nodata alu_hmul.x87.nodata \
+ alu_div.x87.nodata alu_rem.x87.nodata \
alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \
alu_lsh.x87.nodata alu_rsh.x87.nodata alu_rot.x87.nodata \
alu_com.x87.nodata alu_neg.x87.nodata \
@@ -244,7 +248,8 @@ arm_TESTS = \
ext.arm cvt.arm hton.arm bswap.arm \
branch.arm alu_add.arm alux_add.arm \
alu_sub.arm alux_sub.arm alu_rsb.arm \
- alu_mul.arm alu_div.arm alu_rem.arm \
+ alu_mul.arm alu_hmul.arm \
+ alu_div.arm alu_rem.arm \
alu_and.arm alu_or.arm alu_xor.arm \
alu_lsh.arm alu_rsh.arm alu_rot.arm \
alu_com.arm alu_neg.arm \
@@ -271,7 +276,8 @@ swf_TESTS = \
ext.swf cvt.swf hton.swf bswap.swf \
branch.swf alu_add.swf alux_add.swf \
alu_sub.swf alux_sub.swf alu_rsb.swf \
- alu_mul.swf alu_div.swf alu_rem.swf \
+ alu_mul.swf alu_hmul.swf \
+ alu_div.swf alu_rem.swf \
alu_and.swf alu_or.swf alu_xor.swf \
alu_lsh.swf alu_rsh.swf alu_rot.swf \
alu_com.swf alu_neg.swf \
@@ -351,7 +357,8 @@ nodata_TESTS = \
ext.nodata cvt.nodata branch.nodata \
alu_add.nodata alux_add.nodata \
alu_sub.nodata alux_sub.nodata alu_rsb.nodata \
- alu_mul.nodata alu_div.nodata alu_rem.nodata \
+ alu_mul.nodata alu_hmul.nodata \
+ alu_div.nodata alu_rem.nodata \
alu_and.nodata alu_or.nodata alu_xor.nodata \
alu_lsh.nodata alu_rsh.nodata alu_rot.nodata \
alu_com.nodata alu_neg.nodata \
diff --git a/check/alu_hmul.ok b/check/alu_hmul.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_hmul.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_hmul.tst b/check/alu_hmul.tst
new file mode 100644
index 0000000..ef47a4d
--- /dev/null
+++ b/check/alu_hmul.tst
@@ -0,0 +1,31 @@
+#include "alu.inc"
+
+.code
+ prolog
+#define HMUL(N, I0, I1, V) ALU(N, , hmul, I0, I1, V)
+#define UHMUL(N, I0, I1, V) ALU(N, _u, hmul, I0, I1, V)
+ HMUL(0, -2, -1, 0)
+ HMUL(1, 0, -1, 0)
+ HMUL(2, -1, 0, 0)
+ HMUL(3, 1, -1, -1)
+#if __WORDSIZE == 32
+ HMUL(4, 0x7ffff, 0x7ffff, 0x3f)
+ UHMUL(5, 0xffffff, 0xffffff, 0xffff)
+ HMUL(6, 0x80000000, -2, 1)
+ HMUL(7, 0x80000000, 2, -1)
+ HMUL(8, 0x80000001, 3, -2)
+ HMUL(9, 0x80000001, -3, 1)
+#else
+ HMUL(4, 0x7ffffffff, 0x7ffffffff, 0x3f)
+ UHMUL(5, 0xffffffffff, 0xffffffffff, 0xffff)
+ HMUL(6, 0x8000000000000000, -2, 1)
+ HMUL(7, 0x8000000000000000, 2, -1)
+ HMUL(8, 0x8000000000000001, 3, -2)
+ HMUL(9, 0x8000000000000001, -3, 1)
+#endif
+ prepare
+ pushargi ok
+ ellipsis
+ finishi @printf
+ ret
+ epilog
diff --git a/check/lightning.c b/check/lightning.c
index f04ecd8..42a6ed8 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -306,6 +306,8 @@ static void subxr(void); static void subxi(void);
static void subcr(void); static void subci(void);
static void rsbr(void); static void rsbi(void);
static void mulr(void); static void muli(void);
+static void hmulr(void); static void hmuli(void);
+static void hmulr_u(void); static void hmuli_u(void);
static void qmulr(void); static void qmuli(void);
static void qmulr_u(void); static void qmuli_u(void);
static void divr(void); static void divi(void);
@@ -709,6 +711,8 @@ static instr_t instr_vector[] = {
entry(subcr), entry(subci),
entry(rsbr), entry(rsbi),
entry(mulr), entry(muli),
+ entry(hmulr), entry(hmuli),
+ entry(hmulr_u), entry(hmuli_u),
entry(qmulr), entry(qmuli),
entry(qmulr_u), entry(qmuli_u),
entry(divr), entry(divi),
@@ -1723,6 +1727,8 @@ entry_ir_ir_ir(subxr) entry_ir_ir_im(subxi)
entry_ir_ir_ir(subcr) entry_ir_ir_im(subci)
entry_ir_ir_ir(rsbr) entry_ir_ir_im(rsbi)
entry_ir_ir_ir(mulr) entry_ir_ir_im(muli)
+entry_ir_ir_ir(hmulr) entry_ir_ir_im(hmuli)
+entry_ir_ir_ir(hmulr_u) entry_ir_ir_im(hmuli_u)
entry_ir_ir_ir_ir(qmulr) entry_ir_ir_ir_im(qmuli)
entry_ir_ir_ir_ir(qmulr_u) entry_ir_ir_ir_im(qmuli_u)
entry_ir_ir_ir(divr) entry_ir_ir_im(divi)
diff --git a/doc/body.texi b/doc/body.texi
index cb47139..f71b77c 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -247,6 +247,8 @@ rsbr _f _d O1 = O3 - O1
rsbi _f _d O1 = O3 - O1
mulr _f _d O1 = O2 * O3
muli _f _d O1 = O2 * O3
+hmulr _u O1 = ((O2 * O3) >> WORDSIZE)
+hmuli _u O1 = ((O2 * O3) >> WORDSIZE)
divr _u _f _d O1 = O2 / O3
divi _u _f _d O1 = O2 / O3
remr _u O1 = O2 % O3
diff --git a/include/lightning.h.in b/include/lightning.h.in
index a4ef49f..6d51235 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -1203,6 +1203,13 @@ typedef enum {
#define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x)
jit_code_fnmsr_d, jit_code_fnmsi_d,
+#define jit_hmulr(u,v,w) jit_new_node_www(jit_code_hmulr,u,v,w)
+#define jit_hmuli(u,v,w) jit_new_node_www(jit_code_hmuli,u,v,w)
+ jit_code_hmulr, jit_code_hmuli,
+#define jit_hmulr_u(u,v,w) jit_new_node_www(jit_code_hmulr_u,u,v,w)
+#define jit_hmuli_u(u,v,w) jit_new_node_www(jit_code_hmuli_u,u,v,w)
+ jit_code_hmulr_u, jit_code_hmuli_u,
+
jit_code_last_code
} jit_code_t;
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index d414cec..76a988b 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -555,6 +555,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define mulr(r0,r1,r2) MUL(r0,r1,r2)
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) SMULH(r0,r1,r2)
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0,r1,r2) UMULH(r0,r1,r2)
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -1229,6 +1235,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_qmulr(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c
index 0a7bc2d..435bbe9 100644
--- a/lib/jit_aarch64-sz.c
+++ b/lib/jit_aarch64-sz.c
@@ -526,7 +526,12 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
# else /* PACKED_STACK */
+
#define JIT_INSTR_MAX 120
0, /* data */
0, /* live */
@@ -1052,5 +1057,9 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
# endif
#endif /* __WORDSIZE */
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index cd4e79c..bc78800 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1426,6 +1426,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c
index 7791063..b977214 100644
--- a/lib/jit_alpha-cpu.c
+++ b/lib/jit_alpha-cpu.c
@@ -362,6 +362,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t);
# define mulr(r0,r1,r2) MULQ(r1,r2,r0)
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -1082,14 +1086,14 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t reg;
/* The only invalid condition is r0 == r1 */
jit_int32_t t2, t3, s2, s3;
- if (r2 == r0 || r2 == r1) {
+ if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) {
s2 = jit_get_reg(jit_class_gpr);
t2 = rn(s2);
movr(t2, r2);
}
else
t2 = r2;
- if (r3 == r0 || r3 == r1) {
+ if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) {
s3 = jit_get_reg(jit_class_gpr);
t3 = rn(s3);
movr(t3, r3);
@@ -1129,16 +1133,20 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t reg;
- if (r0 == r2 || r0 == r3) {
- reg = jit_get_reg(jit_class_gpr);
- mulr(rn(reg), r2, r3);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2 || r0 == r3) {
+ reg = jit_get_reg(jit_class_gpr);
+ mulr(rn(reg), r2, r3);
+ }
+ else
+ mulr(r0, r2, r3);
}
- else
- mulr(r0, r2, r3);
UMULH(r2, r3, r1);
- if (r0 == r2 || r0 == r3) {
- movr(r0, rn(reg));
- jit_unget_reg(reg);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2 || r0 == r3) {
+ movr(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
}
@@ -1148,16 +1156,20 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0,
{
jit_int32_t reg;
if (_u8_p(i0)) {
- if (r0 == r2) {
- reg = jit_get_reg(jit_class_gpr);
- muli(rn(reg), r2, i0);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ muli(rn(reg), r2, i0);
+ }
+ else
+ muli(r0, r2, i0);
}
- else
- muli(r0, r2, i0);
UMULHi(r2, i0, r1);
- if (r0 == r2) {
- movr(r0, rn(reg));
- jit_unget_reg(reg);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2) {
+ movr(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
}
else {
diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c
index b6ea741..fd39c0d 100644
--- a/lib/jit_alpha-sz.c
+++ b/lib/jit_alpha-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
20, /* fnmsr_d */
0, /* fnmsi_d */
+ 36, /* hmulr */
+ 60, /* hmuli */
+ 4, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index d2d378f..69bf397 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -1019,6 +1019,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index 9e19434..149db9a 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -984,6 +984,16 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) ihmulr(r0,r1,r2,1)
+# define hmulr_u(r0,r1,r2) ihmulr(r0,r1,r2,0)
+# define ihmulr(r0,r1,r2,cc) _ihmulr(_jit,r0,r1,r2,cc)
+static void _ihmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_bool_t);
+# define hmuli(r0,r1,i0) ihmuli(r0,r1,i0,1)
+# define hmuli_u(r0,r1,i0) ihmuli(r0,r1,i0,0)
+# define ihmuli(r0,r1,i0,cc) _ihmuli(_jit,r0,r1,i0,cc)
+static void _ihmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_word_t,jit_bool_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -2322,6 +2332,29 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_ihmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_bool_t sign)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ iqmulr(rn(reg), r0, r1, r2, sign);
+ jit_unget_reg(reg);
+}
+
+static void
+_ihmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_word_t i0, jit_bool_t sign)
+{
+ jit_int32_t t0, t1;
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ movi(rn(t1), i0);
+ iqmulr(rn(t0), r0, r1, rn(t1), sign);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c
index 31931e4..7ec5e9e 100644
--- a/lib/jit_arm-sz.c
+++ b/lib/jit_arm-sz.c
@@ -4,14 +4,14 @@
#define JIT_INSTR_MAX 144
0, /* data */
0, /* live */
- 20, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
- 26, /* prolog */
+ 30, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
@@ -43,45 +43,45 @@
0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
- 16, /* va_arg_d */
+ 28, /* va_arg_d */
0, /* va_end */
4, /* addr */
- 12, /* addi */
+ 12, /* addi */
4, /* addcr */
8, /* addci */
4, /* addxr */
4, /* addxi */
4, /* subr */
- 12, /* subi */
+ 12, /* subi */
4, /* subcr */
8, /* subci */
4, /* subxr */
4, /* subxi */
- 16, /* rsbi */
- 4, /* mulr */
- 12, /* muli */
+ 16, /* rsbi */
+ 8, /* mulr */
+ 12, /* muli */
4, /* qmulr */
- 12, /* qmuli */
+ 12, /* qmuli */
4, /* qmulr_u */
8, /* qmuli_u */
- 32, /* divr */
- 36, /* divi */
- 24, /* divr_u */
- 28, /* divi_u */
- 18, /* qdivr */
- 22, /* qdivi */
- 18, /* qdivr_u */
- 22, /* qdivi_u */
- 24, /* remr */
- 32, /* remi */
- 24, /* remr_u */
- 28, /* remi_u */
+ 32, /* divr */
+ 36, /* divi */
+ 24, /* divr_u */
+ 28, /* divi_u */
+ 18, /* qdivr */
+ 22, /* qdivi */
+ 18, /* qdivr_u */
+ 22, /* qdivi_u */
+ 24, /* remr */
+ 32, /* remi */
+ 24, /* remr_u */
+ 28, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 12, /* andi */
4, /* orr */
- 12, /* ori */
+ 12, /* ori */
4, /* xorr */
- 12, /* xori */
+ 12, /* xori */
4, /* lshr */
4, /* lshi */
4, /* rshr */
@@ -92,98 +92,98 @@
4, /* negi */
4, /* comr */
4, /* comi */
- 14, /* ltr */
- 14, /* lti */
- 14, /* ltr_u */
- 14, /* lti_u */
- 14, /* ler */
- 14, /* lei */
- 14, /* ler_u */
- 14, /* lei_u */
- 14, /* eqr */
- 14, /* eqi */
- 14, /* ger */
- 14, /* gei */
- 14, /* ger_u */
- 14, /* gei_u */
- 14, /* gtr */
- 14, /* gti */
- 14, /* gtr_u */
- 14, /* gti_u */
- 14, /* ner */
- 14, /* nei */
+ 14, /* ltr */
+ 14, /* lti */
+ 14, /* ltr_u */
+ 14, /* lti_u */
+ 14, /* ler */
+ 14, /* lei */
+ 14, /* ler_u */
+ 14, /* lei_u */
+ 14, /* eqr */
+ 14, /* eqi */
+ 14, /* ger */
+ 14, /* gei */
+ 14, /* ger_u */
+ 14, /* gei_u */
+ 14, /* gtr */
+ 14, /* gti */
+ 14, /* gtr_u */
+ 14, /* gti_u */
+ 14, /* ner */
+ 14, /* nei */
4, /* movr */
8, /* movi */
8, /* movnr */
8, /* movzr */
- 42, /* casr */
- 50, /* casi */
- 4, /* extr_c */
+ 42, /* casr */
+ 46, /* casi */
+ 8, /* extr_c */
4, /* exti_c */
4, /* extr_uc */
4, /* exti_uc */
- 4, /* extr_s */
+ 8, /* extr_s */
4, /* exti_s */
- 4, /* extr_us */
+ 8, /* extr_us */
4, /* exti_us */
0, /* extr_i */
0, /* exti_i */
0, /* extr_ui */
0, /* exti_ui */
- 8, /* bswapr_us */
+ 20, /* bswapr_us */
4, /* bswapi_us */
- 4, /* bswapr_ui */
+ 16, /* bswapr_ui */
8, /* bswapi_ui */
0, /* bswapr_ul */
0, /* bswapi_ul */
- 8, /* htonr_us */
+ 20, /* htonr_us */
4, /* htoni_us */
- 4, /* htonr_ui */
+ 16, /* htonr_ui */
8, /* htoni_ui */
0, /* htonr_ul */
0, /* htoni_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 12, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 12, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 12, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 12, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 12, /* ldi_i */
0, /* ldr_ui */
0, /* ldi_ui */
0, /* ldr_l */
0, /* ldi_l */
4, /* ldxr_c */
- 12, /* ldxi_c */
+ 12, /* ldxi_c */
4, /* ldxr_uc */
- 12, /* ldxi_uc */
+ 12, /* ldxi_uc */
4, /* ldxr_s */
- 12, /* ldxi_s */
+ 12, /* ldxi_s */
4, /* ldxr_us */
- 12, /* ldxi_us */
+ 12, /* ldxi_us */
4, /* ldxr_i */
- 12, /* ldxi_i */
+ 12, /* ldxi_i */
0, /* ldxr_ui */
0, /* ldxi_ui */
0, /* ldxr_l */
0, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 12, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 12, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 12, /* sti_i */
0, /* str_l */
0, /* sti_l */
4, /* stxr_c */
- 12, /* stxi_c */
+ 12, /* stxi_c */
4, /* stxr_s */
- 12, /* stxi_s */
+ 12, /* stxi_s */
4, /* stxr_i */
- 12, /* stxi_i */
+ 12, /* stxi_i */
0, /* stxr_l */
0, /* stxi_l */
8, /* bltr */
@@ -195,7 +195,7 @@
8, /* bler_u */
8, /* blei_u */
8, /* beqr */
- 16, /* beqi */
+ 16, /* beqi */
8, /* bger */
8, /* bgei */
8, /* bger_u */
@@ -205,7 +205,7 @@
8, /* bgtr_u */
8, /* bgti_u */
8, /* bner */
- 16, /* bnei */
+ 16, /* bnei */
8, /* bmsr */
8, /* bmsi */
8, /* bmcr */
@@ -226,10 +226,10 @@
8, /* bxsubi */
8, /* bxsubr_u */
8, /* bxsubi_u */
- 4, /* jmpr */
- 8, /* jmpi */
+ 12, /* jmpr */
+ 72, /* jmpi */
4, /* callr */
- 20, /* calli */
+ 20, /* calli */
0, /* prepare */
0, /* pushargr_c */
0, /* pushargi_c */
@@ -269,96 +269,96 @@
0, /* retval_i */
0, /* retval_ui */
0, /* retval_l */
- 16, /* epilog */
+ 276, /* epilog */
0, /* arg_f */
0, /* getarg_f */
0, /* putargr_f */
0, /* putargi_f */
- 4, /* addr_f */
- 8, /* addi_f */
- 4, /* subr_f */
- 8, /* subi_f */
- 8, /* rsbi_f */
- 4, /* mulr_f */
- 8, /* muli_f */
- 4, /* divr_f */
- 8, /* divi_f */
- 4, /* negr_f */
+ 24, /* addr_f */
+ 24, /* addi_f */
+ 24, /* subr_f */
+ 24, /* subi_f */
+ 24, /* rsbi_f */
+ 24, /* mulr_f */
+ 24, /* muli_f */
+ 24, /* divr_f */
+ 24, /* divi_f */
+ 12, /* negr_f */
0, /* negi_f */
- 4, /* absr_f */
+ 12, /* absr_f */
0, /* absi_f */
- 4, /* sqrtr_f */
+ 20, /* sqrtr_f */
0, /* sqrti_f */
- 18, /* ltr_f */
- 30, /* lti_f */
- 20, /* ler_f */
- 32, /* lei_f */
- 18, /* eqr_f */
- 30, /* eqi_f */
- 18, /* ger_f */
- 30, /* gei_f */
- 18, /* gtr_f */
- 30, /* gti_f */
- 18, /* ner_f */
- 30, /* nei_f */
- 18, /* unltr_f */
- 30, /* unlti_f */
- 18, /* unler_f */
- 30, /* unlei_f */
- 24, /* uneqr_f */
- 36, /* uneqi_f */
- 18, /* unger_f */
- 30, /* ungei_f */
- 18, /* ungtr_f */
- 30, /* ungti_f */
- 24, /* ltgtr_f */
- 36, /* ltgti_f */
- 18, /* ordr_f */
- 30, /* ordi_f */
- 18, /* unordr_f */
- 30, /* unordi_f */
- 8, /* truncr_f_i */
+ 24, /* ltr_f */
+ 30, /* lti_f */
+ 24, /* ler_f */
+ 32, /* lei_f */
+ 24, /* eqr_f */
+ 30, /* eqi_f */
+ 24, /* ger_f */
+ 30, /* gei_f */
+ 24, /* gtr_f */
+ 30, /* gti_f */
+ 28, /* ner_f */
+ 32, /* nei_f */
+ 56, /* unltr_f */
+ 64, /* unlti_f */
+ 56, /* unler_f */
+ 64, /* unlei_f */
+ 56, /* uneqr_f */
+ 64, /* uneqi_f */
+ 56, /* unger_f */
+ 64, /* ungei_f */
+ 56, /* ungtr_f */
+ 64, /* ungti_f */
+ 60, /* ltgtr_f */
+ 68, /* ltgti_f */
+ 28, /* ordr_f */
+ 32, /* ordi_f */
+ 56, /* unordr_f */
+ 64, /* unordi_f */
+ 20, /* truncr_f_i */
0, /* truncr_f_l */
- 8, /* extr_f */
- 4, /* extr_d_f */
- 4, /* movr_f */
- 12, /* movi_f */
- 4, /* ldr_f */
- 12, /* ldi_f */
+ 28, /* extr_f */
+ 22, /* extr_d_f */
+ 8, /* movr_f */
+ 16, /* movi_f */
+ 8, /* ldr_f */
+ 16, /* ldi_f */
8, /* ldxr_f */
- 16, /* ldxi_f */
- 4, /* str_f */
- 12, /* sti_f */
+ 16, /* ldxi_f */
+ 8, /* str_f */
+ 16, /* sti_f */
8, /* stxr_f */
- 16, /* stxi_f */
- 12, /* bltr_f */
- 24, /* blti_f */
- 12, /* bler_f */
- 24, /* blei_f */
- 12, /* beqr_f */
- 24, /* beqi_f */
- 12, /* bger_f */
- 24, /* bgei_f */
- 12, /* bgtr_f */
- 24, /* bgti_f */
- 12, /* bner_f */
- 24, /* bnei_f */
- 16, /* bunltr_f */
- 28, /* bunlti_f */
- 16, /* bunler_f */
- 28, /* bunlei_f */
- 20, /* buneqr_f */
- 32, /* buneqi_f */
- 16, /* bunger_f */
- 28, /* bungei_f */
- 12, /* bungtr_f */
- 24, /* bungti_f */
- 20, /* bltgtr_f */
- 32, /* bltgti_f */
- 12, /* bordr_f */
- 24, /* bordi_f */
- 12, /* bunordr_f */
- 24, /* bunordi_f */
+ 16, /* stxi_f */
+ 28, /* bltr_f */
+ 32, /* blti_f */
+ 28, /* bler_f */
+ 32, /* blei_f */
+ 28, /* beqr_f */
+ 48, /* beqi_f */
+ 28, /* bger_f */
+ 32, /* bgei_f */
+ 28, /* bgtr_f */
+ 32, /* bgti_f */
+ 28, /* bner_f */
+ 32, /* bnei_f */
+ 28, /* bunltr_f */
+ 32, /* bunlti_f */
+ 28, /* bunler_f */
+ 32, /* bunlei_f */
+ 60, /* buneqr_f */
+ 68, /* buneqi_f */
+ 28, /* bunger_f */
+ 32, /* bungei_f */
+ 28, /* bungtr_f */
+ 32, /* bungti_f */
+ 60, /* bltgtr_f */
+ 68, /* bltgti_f */
+ 28, /* bordr_f */
+ 32, /* bordi_f */
+ 28, /* bunordr_f */
+ 32, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
@@ -368,91 +368,91 @@
0, /* getarg_d */
0, /* putargr_d */
0, /* putargi_d */
- 4, /* addr_d */
- 20, /* addi_d */
- 4, /* subr_d */
- 20, /* subi_d */
- 20, /* rsbi_d */
- 4, /* mulr_d */
- 20, /* muli_d */
- 4, /* divr_d */
- 20, /* divi_d */
- 4, /* negr_d */
+ 34, /* addr_d */
+ 36, /* addi_d */
+ 34, /* subr_d */
+ 36, /* subi_d */
+ 36, /* rsbi_d */
+ 34, /* mulr_d */
+ 36, /* muli_d */
+ 34, /* divr_d */
+ 36, /* divi_d */
+ 20, /* negr_d */
0, /* negi_d */
- 4, /* absr_d */
+ 20, /* absr_d */
0, /* absi_d */
- 4, /* sqrtr_d */
+ 26, /* sqrtr_d */
0, /* sqrti_d */
- 18, /* ltr_d */
- 34, /* lti_d */
- 20, /* ler_d */
- 36, /* lei_d */
- 18, /* eqr_d */
- 34, /* eqi_d */
- 18, /* ger_d */
- 34, /* gei_d */
- 18, /* gtr_d */
- 34, /* gti_d */
- 18, /* ner_d */
- 34, /* nei_d */
- 18, /* unltr_d */
- 34, /* unlti_d */
- 18, /* unler_d */
- 34, /* unlei_d */
- 24, /* uneqr_d */
- 40, /* uneqi_d */
- 18, /* unger_d */
- 34, /* ungei_d */
- 18, /* ungtr_d */
- 34, /* ungti_d */
- 24, /* ltgtr_d */
- 40, /* ltgti_d */
- 18, /* ordr_d */
- 34, /* ordi_d */
- 18, /* unordr_d */
- 34, /* unordi_d */
- 8, /* truncr_d_i */
+ 28, /* ltr_d */
+ 34, /* lti_d */
+ 28, /* ler_d */
+ 36, /* lei_d */
+ 28, /* eqr_d */
+ 34, /* eqi_d */
+ 28, /* ger_d */
+ 34, /* gei_d */
+ 28, /* gtr_d */
+ 34, /* gti_d */
+ 32, /* ner_d */
+ 36, /* nei_d */
+ 66, /* unltr_d */
+ 72, /* unlti_d */
+ 66, /* unler_d */
+ 72, /* unlei_d */
+ 66, /* uneqr_d */
+ 72, /* uneqi_d */
+ 66, /* unger_d */
+ 72, /* ungei_d */
+ 66, /* ungtr_d */
+ 72, /* ungti_d */
+ 70, /* ltgtr_d */
+ 76, /* ltgti_d */
+ 32, /* ordr_d */
+ 36, /* ordi_d */
+ 66, /* unordr_d */
+ 72, /* unordi_d */
+ 20, /* truncr_d_i */
0, /* truncr_d_l */
- 8, /* extr_d */
- 4, /* extr_f_d */
- 4, /* movr_d */
- 32, /* movi_d */
- 4, /* ldr_d */
- 12, /* ldi_d */
- 8, /* ldxr_d */
- 16, /* ldxi_d */
- 4, /* str_d */
- 12, /* sti_d */
- 8, /* stxr_d */
- 16, /* stxi_d */
- 12, /* bltr_d */
- 28, /* blti_d */
- 12, /* bler_d */
- 28, /* blei_d */
- 12, /* beqr_d */
- 36, /* beqi_d */
- 12, /* bger_d */
- 28, /* bgei_d */
- 12, /* bgtr_d */
- 28, /* bgti_d */
- 12, /* bner_d */
- 28, /* bnei_d */
- 16, /* bunltr_d */
- 32, /* bunlti_d */
- 16, /* bunler_d */
- 32, /* bunlei_d */
- 20, /* buneqr_d */
- 36, /* buneqi_d */
- 16, /* bunger_d */
- 32, /* bungei_d */
- 12, /* bungtr_d */
- 28, /* bungti_d */
- 20, /* bltgtr_d */
- 36, /* bltgti_d */
- 12, /* bordr_d */
- 28, /* bordi_d */
- 12, /* bunordr_d */
- 28, /* bunordi_d */
+ 36, /* extr_d */
+ 22, /* extr_f_d */
+ 16, /* movr_d */
+ 32, /* movi_d */
+ 16, /* ldr_d */
+ 24, /* ldi_d */
+ 20, /* ldxr_d */
+ 28, /* ldxi_d */
+ 16, /* str_d */
+ 24, /* sti_d */
+ 20, /* stxr_d */
+ 28, /* stxi_d */
+ 32, /* bltr_d */
+ 36, /* blti_d */
+ 32, /* bler_d */
+ 36, /* blei_d */
+ 32, /* beqr_d */
+ 52, /* beqi_d */
+ 32, /* bger_d */
+ 36, /* bgei_d */
+ 32, /* bgtr_d */
+ 36, /* bgti_d */
+ 32, /* bner_d */
+ 36, /* bnei_d */
+ 32, /* bunltr_d */
+ 36, /* bunlti_d */
+ 32, /* bunler_d */
+ 36, /* bunlei_d */
+ 68, /* buneqr_d */
+ 76, /* buneqi_d */
+ 32, /* bunger_d */
+ 36, /* bungei_d */
+ 32, /* bungtr_d */
+ 36, /* bungti_d */
+ 68, /* bltgtr_d */
+ 76, /* bltgti_d */
+ 32, /* bordr_d */
+ 36, /* bordi_d */
+ 32, /* bunordr_d */
+ 36, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
@@ -460,72 +460,76 @@
0, /* retval_d */
4, /* movr_w_f */
8, /* movi_w_f */
- 4, /* movr_ww_d */
- 16, /* movi_ww_d */
+ 8, /* movr_ww_d */
+ 20, /* movi_ww_d */
0, /* movr_w_d */
0, /* movi_w_d */
4, /* movr_f_w */
- 4, /* movi_f_w */
- 4, /* movr_d_ww */
- 12, /* movi_d_ww */
+ 8, /* movi_f_w */
+ 8, /* movr_d_ww */
+ 12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
8, /* clor */
4, /* cloi */
4, /* clzr */
4, /* clzi */
- 12, /* ctor */
+ 12, /* ctor */
4, /* ctoi */
8, /* ctzr */
4, /* ctzi */
4, /* rbitr */
8, /* rbiti */
- 40, /* popcntr */
+ 40, /* popcntr */
4, /* popcnti */
- 12, /* lrotr */
+ 12, /* lrotr */
4, /* lroti */
4, /* rrotr */
4, /* rroti */
- 4, /* extr */
+ 8, /* extr */
4, /* exti */
- 4, /* extr_u */
+ 12, /* extr_u */
4, /* exti_u */
- 4, /* depr */
- 8, /* depi */
- 50, /* qlshr */
+ 24, /* depr */
+ 20, /* depi */
+ 50, /* qlshr */
8, /* qlshi */
- 50, /* qlshr_u */
+ 50, /* qlshr_u */
8, /* qlshi_u */
- 50, /* qrshr */
+ 50, /* qrshr */
8, /* qrshi */
- 50, /* qrshr_u */
+ 50, /* qrshr_u */
8, /* qrshi_u */
- 72, /* unldr */
- 44, /* unldi */
- 72, /* unldr_u */
- 44, /* unldi_u */
- 68, /* unstr */
- 44, /* unsti */
- 140, /* unldr_x */
- 76, /* unldi_x */
- 144, /* unstr_x */
- 92, /* unsti_x */
- 8, /* fmar_f */
+ 72, /* unldr */
+ 44, /* unldi */
+ 72, /* unldr_u */
+ 44, /* unldi_u */
+ 68, /* unstr */
+ 44, /* unsti */
+ 144, /* unldr_x */
+ 80, /* unldi_x */
+ 148, /* unstr_x */
+ 96, /* unsti_x */
+ 48, /* fmar_f */
0, /* fmai_f */
- 8, /* fmsr_f */
+ 48, /* fmsr_f */
0, /* fmsi_f */
- 8, /* fmar_d */
+ 68, /* fmar_d */
0, /* fmai_d */
- 8, /* fmsr_d */
+ 68, /* fmsr_d */
0, /* fmsi_d */
- 12, /* fnmar_f */
+ 60, /* fnmar_f */
0, /* fnmai_f */
- 12, /* fnmsr_f */
+ 60, /* fnmsr_f */
0, /* fnmsi_f */
- 12, /* fnmar_d */
+ 88, /* fnmar_d */
0, /* fnmai_d */
- 12, /* fnmsr_d */
+ 88, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 8, /* hmuli_u */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
@@ -534,14 +538,14 @@
#define JIT_INSTR_MAX 144
0, /* data */
0, /* live */
- 20, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
- 30, /* prolog */
+ 26, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
@@ -573,45 +577,45 @@
0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
- 28, /* va_arg_d */
+ 16, /* va_arg_d */
0, /* va_end */
4, /* addr */
- 12, /* addi */
+ 12, /* addi */
4, /* addcr */
8, /* addci */
4, /* addxr */
4, /* addxi */
4, /* subr */
- 12, /* subi */
+ 12, /* subi */
4, /* subcr */
8, /* subci */
4, /* subxr */
4, /* subxi */
- 16, /* rsbi */
- 8, /* mulr */
- 12, /* muli */
+ 16, /* rsbi */
+ 4, /* mulr */
+ 12, /* muli */
4, /* qmulr */
- 12, /* qmuli */
+ 12, /* qmuli */
4, /* qmulr_u */
8, /* qmuli_u */
- 32, /* divr */
- 36, /* divi */
- 24, /* divr_u */
- 28, /* divi_u */
- 18, /* qdivr */
- 22, /* qdivi */
- 18, /* qdivr_u */
- 22, /* qdivi_u */
- 24, /* remr */
- 32, /* remi */
- 24, /* remr_u */
- 28, /* remi_u */
+ 32, /* divr */
+ 36, /* divi */
+ 24, /* divr_u */
+ 28, /* divi_u */
+ 18, /* qdivr */
+ 22, /* qdivi */
+ 18, /* qdivr_u */
+ 22, /* qdivi_u */
+ 24, /* remr */
+ 32, /* remi */
+ 24, /* remr_u */
+ 28, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 12, /* andi */
4, /* orr */
- 12, /* ori */
+ 12, /* ori */
4, /* xorr */
- 12, /* xori */
+ 12, /* xori */
4, /* lshr */
4, /* lshi */
4, /* rshr */
@@ -622,98 +626,98 @@
4, /* negi */
4, /* comr */
4, /* comi */
- 14, /* ltr */
- 14, /* lti */
- 14, /* ltr_u */
- 14, /* lti_u */
- 14, /* ler */
- 14, /* lei */
- 14, /* ler_u */
- 14, /* lei_u */
- 14, /* eqr */
- 14, /* eqi */
- 14, /* ger */
- 14, /* gei */
- 14, /* ger_u */
- 14, /* gei_u */
- 14, /* gtr */
- 14, /* gti */
- 14, /* gtr_u */
- 14, /* gti_u */
- 14, /* ner */
- 14, /* nei */
+ 14, /* ltr */
+ 14, /* lti */
+ 14, /* ltr_u */
+ 14, /* lti_u */
+ 14, /* ler */
+ 14, /* lei */
+ 14, /* ler_u */
+ 14, /* lei_u */
+ 14, /* eqr */
+ 14, /* eqi */
+ 14, /* ger */
+ 14, /* gei */
+ 14, /* ger_u */
+ 14, /* gei_u */
+ 14, /* gtr */
+ 14, /* gti */
+ 14, /* gtr_u */
+ 14, /* gti_u */
+ 14, /* ner */
+ 14, /* nei */
4, /* movr */
8, /* movi */
8, /* movnr */
8, /* movzr */
- 42, /* casr */
- 46, /* casi */
- 8, /* extr_c */
+ 42, /* casr */
+ 50, /* casi */
+ 4, /* extr_c */
4, /* exti_c */
4, /* extr_uc */
4, /* exti_uc */
- 8, /* extr_s */
+ 4, /* extr_s */
4, /* exti_s */
- 8, /* extr_us */
+ 4, /* extr_us */
4, /* exti_us */
0, /* extr_i */
0, /* exti_i */
0, /* extr_ui */
0, /* exti_ui */
- 20, /* bswapr_us */
+ 8, /* bswapr_us */
4, /* bswapi_us */
- 16, /* bswapr_ui */
+ 4, /* bswapr_ui */
8, /* bswapi_ui */
0, /* bswapr_ul */
0, /* bswapi_ul */
- 20, /* htonr_us */
+ 8, /* htonr_us */
4, /* htoni_us */
- 16, /* htonr_ui */
+ 4, /* htonr_ui */
8, /* htoni_ui */
0, /* htonr_ul */
0, /* htoni_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 12, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 12, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 12, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 12, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 12, /* ldi_i */
0, /* ldr_ui */
0, /* ldi_ui */
0, /* ldr_l */
0, /* ldi_l */
4, /* ldxr_c */
- 12, /* ldxi_c */
+ 12, /* ldxi_c */
4, /* ldxr_uc */
- 12, /* ldxi_uc */
+ 12, /* ldxi_uc */
4, /* ldxr_s */
- 12, /* ldxi_s */
+ 12, /* ldxi_s */
4, /* ldxr_us */
- 12, /* ldxi_us */
+ 12, /* ldxi_us */
4, /* ldxr_i */
- 12, /* ldxi_i */
+ 12, /* ldxi_i */
0, /* ldxr_ui */
0, /* ldxi_ui */
0, /* ldxr_l */
0, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 12, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 12, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 12, /* sti_i */
0, /* str_l */
0, /* sti_l */
4, /* stxr_c */
- 12, /* stxi_c */
+ 12, /* stxi_c */
4, /* stxr_s */
- 12, /* stxi_s */
+ 12, /* stxi_s */
4, /* stxr_i */
- 12, /* stxi_i */
+ 12, /* stxi_i */
0, /* stxr_l */
0, /* stxi_l */
8, /* bltr */
@@ -725,7 +729,7 @@
8, /* bler_u */
8, /* blei_u */
8, /* beqr */
- 16, /* beqi */
+ 16, /* beqi */
8, /* bger */
8, /* bgei */
8, /* bger_u */
@@ -735,7 +739,7 @@
8, /* bgtr_u */
8, /* bgti_u */
8, /* bner */
- 16, /* bnei */
+ 16, /* bnei */
8, /* bmsr */
8, /* bmsi */
8, /* bmcr */
@@ -756,10 +760,10 @@
8, /* bxsubi */
8, /* bxsubr_u */
8, /* bxsubi_u */
- 12, /* jmpr */
- 72, /* jmpi */
+ 4, /* jmpr */
+ 8, /* jmpi */
4, /* callr */
- 20, /* calli */
+ 20, /* calli */
0, /* prepare */
0, /* pushargr_c */
0, /* pushargi_c */
@@ -804,91 +808,91 @@
0, /* getarg_f */
0, /* putargr_f */
0, /* putargi_f */
- 24, /* addr_f */
- 24, /* addi_f */
- 24, /* subr_f */
- 24, /* subi_f */
- 24, /* rsbi_f */
- 24, /* mulr_f */
- 24, /* muli_f */
- 24, /* divr_f */
- 24, /* divi_f */
- 12, /* negr_f */
+ 4, /* addr_f */
+ 8, /* addi_f */
+ 4, /* subr_f */
+ 8, /* subi_f */
+ 8, /* rsbi_f */
+ 4, /* mulr_f */
+ 8, /* muli_f */
+ 4, /* divr_f */
+ 8, /* divi_f */
+ 4, /* negr_f */
0, /* negi_f */
- 12, /* absr_f */
+ 4, /* absr_f */
0, /* absi_f */
- 20, /* sqrtr_f */
+ 4, /* sqrtr_f */
0, /* sqrti_f */
- 24, /* ltr_f */
- 30, /* lti_f */
- 24, /* ler_f */
- 32, /* lei_f */
- 24, /* eqr_f */
- 30, /* eqi_f */
- 24, /* ger_f */
- 30, /* gei_f */
- 24, /* gtr_f */
- 30, /* gti_f */
- 28, /* ner_f */
- 32, /* nei_f */
- 56, /* unltr_f */
- 64, /* unlti_f */
- 56, /* unler_f */
- 64, /* unlei_f */
- 56, /* uneqr_f */
- 64, /* uneqi_f */
- 56, /* unger_f */
- 64, /* ungei_f */
- 56, /* ungtr_f */
- 64, /* ungti_f */
- 60, /* ltgtr_f */
- 68, /* ltgti_f */
- 28, /* ordr_f */
- 32, /* ordi_f */
- 56, /* unordr_f */
- 64, /* unordi_f */
- 20, /* truncr_f_i */
+ 18, /* ltr_f */
+ 30, /* lti_f */
+ 20, /* ler_f */
+ 32, /* lei_f */
+ 18, /* eqr_f */
+ 30, /* eqi_f */
+ 18, /* ger_f */
+ 30, /* gei_f */
+ 18, /* gtr_f */
+ 30, /* gti_f */
+ 18, /* ner_f */
+ 30, /* nei_f */
+ 18, /* unltr_f */
+ 30, /* unlti_f */
+ 18, /* unler_f */
+ 30, /* unlei_f */
+ 24, /* uneqr_f */
+ 36, /* uneqi_f */
+ 18, /* unger_f */
+ 30, /* ungei_f */
+ 18, /* ungtr_f */
+ 30, /* ungti_f */
+ 24, /* ltgtr_f */
+ 36, /* ltgti_f */
+ 18, /* ordr_f */
+ 30, /* ordi_f */
+ 18, /* unordr_f */
+ 30, /* unordi_f */
+ 8, /* truncr_f_i */
0, /* truncr_f_l */
- 28, /* extr_f */
- 22, /* extr_d_f */
- 8, /* movr_f */
- 16, /* movi_f */
- 8, /* ldr_f */
- 16, /* ldi_f */
+ 8, /* extr_f */
+ 4, /* extr_d_f */
+ 4, /* movr_f */
+ 12, /* movi_f */
+ 4, /* ldr_f */
+ 12, /* ldi_f */
8, /* ldxr_f */
- 16, /* ldxi_f */
- 8, /* str_f */
- 16, /* sti_f */
+ 16, /* ldxi_f */
+ 4, /* str_f */
+ 12, /* sti_f */
8, /* stxr_f */
- 16, /* stxi_f */
- 28, /* bltr_f */
- 32, /* blti_f */
- 28, /* bler_f */
- 32, /* blei_f */
- 28, /* beqr_f */
- 48, /* beqi_f */
- 28, /* bger_f */
- 32, /* bgei_f */
- 28, /* bgtr_f */
- 32, /* bgti_f */
- 28, /* bner_f */
- 32, /* bnei_f */
- 28, /* bunltr_f */
- 32, /* bunlti_f */
- 28, /* bunler_f */
- 32, /* bunlei_f */
- 60, /* buneqr_f */
- 68, /* buneqi_f */
- 28, /* bunger_f */
- 32, /* bungei_f */
- 28, /* bungtr_f */
- 32, /* bungti_f */
- 60, /* bltgtr_f */
- 68, /* bltgti_f */
- 28, /* bordr_f */
- 32, /* bordi_f */
- 28, /* bunordr_f */
- 32, /* bunordi_f */
+ 16, /* stxi_f */
+ 12, /* bltr_f */
+ 24, /* blti_f */
+ 12, /* bler_f */
+ 24, /* blei_f */
+ 12, /* beqr_f */
+ 24, /* beqi_f */
+ 12, /* bger_f */
+ 24, /* bgei_f */
+ 12, /* bgtr_f */
+ 24, /* bgti_f */
+ 12, /* bner_f */
+ 24, /* bnei_f */
+ 16, /* bunltr_f */
+ 28, /* bunlti_f */
+ 16, /* bunler_f */
+ 28, /* bunlei_f */
+ 20, /* buneqr_f */
+ 32, /* buneqi_f */
+ 16, /* bunger_f */
+ 28, /* bungei_f */
+ 12, /* bungtr_f */
+ 24, /* bungti_f */
+ 20, /* bltgtr_f */
+ 32, /* bltgti_f */
+ 12, /* bordr_f */
+ 24, /* bordi_f */
+ 12, /* bunordr_f */
+ 24, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
@@ -898,91 +902,91 @@
0, /* getarg_d */
0, /* putargr_d */
0, /* putargi_d */
- 34, /* addr_d */
- 36, /* addi_d */
- 34, /* subr_d */
- 36, /* subi_d */
- 36, /* rsbi_d */
- 34, /* mulr_d */
- 36, /* muli_d */
- 34, /* divr_d */
- 36, /* divi_d */
- 20, /* negr_d */
+ 4, /* addr_d */
+ 20, /* addi_d */
+ 4, /* subr_d */
+ 20, /* subi_d */
+ 20, /* rsbi_d */
+ 4, /* mulr_d */
+ 20, /* muli_d */
+ 4, /* divr_d */
+ 20, /* divi_d */
+ 4, /* negr_d */
0, /* negi_d */
- 20, /* absr_d */
+ 4, /* absr_d */
0, /* absi_d */
- 26, /* sqrtr_d */
+ 4, /* sqrtr_d */
0, /* sqrti_d */
- 28, /* ltr_d */
- 34, /* lti_d */
- 28, /* ler_d */
- 36, /* lei_d */
- 28, /* eqr_d */
- 34, /* eqi_d */
- 28, /* ger_d */
- 34, /* gei_d */
- 28, /* gtr_d */
- 34, /* gti_d */
- 32, /* ner_d */
- 36, /* nei_d */
- 66, /* unltr_d */
- 72, /* unlti_d */
- 66, /* unler_d */
- 72, /* unlei_d */
- 66, /* uneqr_d */
- 72, /* uneqi_d */
- 66, /* unger_d */
- 72, /* ungei_d */
- 66, /* ungtr_d */
- 72, /* ungti_d */
- 70, /* ltgtr_d */
- 76, /* ltgti_d */
- 32, /* ordr_d */
- 36, /* ordi_d */
- 66, /* unordr_d */
- 72, /* unordi_d */
- 20, /* truncr_d_i */
+ 18, /* ltr_d */
+ 34, /* lti_d */
+ 20, /* ler_d */
+ 36, /* lei_d */
+ 18, /* eqr_d */
+ 34, /* eqi_d */
+ 18, /* ger_d */
+ 34, /* gei_d */
+ 18, /* gtr_d */
+ 34, /* gti_d */
+ 18, /* ner_d */
+ 34, /* nei_d */
+ 18, /* unltr_d */
+ 34, /* unlti_d */
+ 18, /* unler_d */
+ 34, /* unlei_d */
+ 24, /* uneqr_d */
+ 40, /* uneqi_d */
+ 18, /* unger_d */
+ 34, /* ungei_d */
+ 18, /* ungtr_d */
+ 34, /* ungti_d */
+ 24, /* ltgtr_d */
+ 40, /* ltgti_d */
+ 18, /* ordr_d */
+ 34, /* ordi_d */
+ 18, /* unordr_d */
+ 34, /* unordi_d */
+ 8, /* truncr_d_i */
0, /* truncr_d_l */
- 36, /* extr_d */
- 22, /* extr_f_d */
- 16, /* movr_d */
- 32, /* movi_d */
- 16, /* ldr_d */
- 24, /* ldi_d */
- 20, /* ldxr_d */
- 28, /* ldxi_d */
- 16, /* str_d */
- 24, /* sti_d */
- 20, /* stxr_d */
- 28, /* stxi_d */
- 32, /* bltr_d */
- 36, /* blti_d */
- 32, /* bler_d */
- 36, /* blei_d */
- 32, /* beqr_d */
- 52, /* beqi_d */
- 32, /* bger_d */
- 36, /* bgei_d */
- 32, /* bgtr_d */
- 36, /* bgti_d */
- 32, /* bner_d */
- 36, /* bnei_d */
- 32, /* bunltr_d */
- 36, /* bunlti_d */
- 32, /* bunler_d */
- 36, /* bunlei_d */
- 68, /* buneqr_d */
- 76, /* buneqi_d */
- 32, /* bunger_d */
- 36, /* bungei_d */
- 32, /* bungtr_d */
- 36, /* bungti_d */
- 68, /* bltgtr_d */
- 76, /* bltgti_d */
- 32, /* bordr_d */
- 36, /* bordi_d */
- 32, /* bunordr_d */
- 36, /* bunordi_d */
+ 8, /* extr_d */
+ 4, /* extr_f_d */
+ 4, /* movr_d */
+ 32, /* movi_d */
+ 4, /* ldr_d */
+ 12, /* ldi_d */
+ 8, /* ldxr_d */
+ 16, /* ldxi_d */
+ 4, /* str_d */
+ 12, /* sti_d */
+ 8, /* stxr_d */
+ 16, /* stxi_d */
+ 12, /* bltr_d */
+ 28, /* blti_d */
+ 12, /* bler_d */
+ 28, /* blei_d */
+ 12, /* beqr_d */
+ 36, /* beqi_d */
+ 12, /* bger_d */
+ 28, /* bgei_d */
+ 12, /* bgtr_d */
+ 28, /* bgti_d */
+ 12, /* bner_d */
+ 28, /* bnei_d */
+ 16, /* bunltr_d */
+ 32, /* bunlti_d */
+ 16, /* bunler_d */
+ 32, /* bunlei_d */
+ 20, /* buneqr_d */
+ 36, /* buneqi_d */
+ 16, /* bunger_d */
+ 32, /* bungei_d */
+ 12, /* bungtr_d */
+ 28, /* bungti_d */
+ 20, /* bltgtr_d */
+ 36, /* bltgti_d */
+ 12, /* bordr_d */
+ 28, /* bordi_d */
+ 12, /* bunordr_d */
+ 28, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
@@ -990,71 +994,75 @@
0, /* retval_d */
4, /* movr_w_f */
8, /* movi_w_f */
- 8, /* movr_ww_d */
- 20, /* movi_ww_d */
+ 4, /* movr_ww_d */
+ 16, /* movi_ww_d */
0, /* movr_w_d */
0, /* movi_w_d */
4, /* movr_f_w */
- 8, /* movi_f_w */
- 8, /* movr_d_ww */
- 12, /* movi_d_ww */
+ 4, /* movi_f_w */
+ 4, /* movr_d_ww */
+ 12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
8, /* clor */
4, /* cloi */
4, /* clzr */
4, /* clzi */
- 12, /* ctor */
+ 12, /* ctor */
4, /* ctoi */
8, /* ctzr */
4, /* ctzi */
4, /* rbitr */
8, /* rbiti */
- 40, /* popcntr */
+ 40, /* popcntr */
4, /* popcnti */
- 12, /* lrotr */
+ 12, /* lrotr */
4, /* lroti */
4, /* rrotr */
4, /* rroti */
- 8, /* extr */
+ 4, /* extr */
4, /* exti */
- 12, /* extr_u */
+ 4, /* extr_u */
4, /* exti_u */
- 24, /* depr */
- 20, /* depi */
- 50, /* qlshr */
+ 4, /* depr */
+ 8, /* depi */
+ 50, /* qlshr */
8, /* qlshi */
- 50, /* qlshr_u */
+ 50, /* qlshr_u */
8, /* qlshi_u */
- 50, /* qrshr */
+ 50, /* qrshr */
8, /* qrshi */
- 50, /* qrshr_u */
+ 50, /* qrshr_u */
8, /* qrshi_u */
- 72, /* unldr */
- 44, /* unldi */
- 72, /* unldr_u */
- 44, /* unldi_u */
- 68, /* unstr */
- 44, /* unsti */
+ 72, /* unldr */
+ 44, /* unldi */
+ 72, /* unldr_u */
+ 44, /* unldi_u */
+ 68, /* unstr */
+ 44, /* unsti */
140, /* unldr_x */
- 76, /* unldi_x */
+ 76, /* unldi_x */
144, /* unstr_x */
- 96, /* unsti_x */
- 48, /* fmar_f */
+ 92, /* unsti_x */
+ 8, /* fmar_f */
0, /* fmai_f */
- 48, /* fmsr_f */
+ 8, /* fmsr_f */
0, /* fmsi_f */
- 68, /* fmar_d */
+ 8, /* fmar_d */
0, /* fmai_d */
- 68, /* fmsr_d */
+ 8, /* fmsr_d */
0, /* fmsi_d */
- 60, /* fnmar_f */
+ 12, /* fnmar_f */
0, /* fnmai_f */
- 60, /* fnmsr_f */
+ 12, /* fnmsr_f */
0, /* fnmsi_f */
- 88, /* fnmar_d */
+ 12, /* fnmar_d */
0, /* fnmai_d */
- 88, /* fnmsr_d */
+ 12, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 8, /* hmuli_u */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index df6c0e7..25aa7cb 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1573,6 +1573,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c
index bb6e540..4db79d8 100644
--- a/lib/jit_hppa-cpu.c
+++ b/lib/jit_hppa-cpu.c
@@ -707,6 +707,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+#define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+#define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+#define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
static long long __llmul(int, int);
#define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,
@@ -1939,7 +1943,8 @@ _qmulr(jit_state_t *_jit,
movr(_R26_REGNO, r2);
movr(_R25_REGNO, r3);
calli((jit_word_t)__llmul);
- movr(r0, _R29_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _R29_REGNO);
movr(r1, _R28_REGNO);
}
@@ -1950,7 +1955,8 @@ _qmuli(jit_state_t *_jit,
movr(_R26_REGNO, r2);
movi(_R25_REGNO, i0);
calli((jit_word_t)__llmul);
- movr(r0, _R29_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _R29_REGNO);
movr(r1, _R28_REGNO);
}
@@ -1967,7 +1973,8 @@ _qmulr_u(jit_state_t *_jit,
ldxi_f(rn(t1), _FP_REGNO, alloca_offset - 8);
XMPYU(rn(t0), rn(t1), rn(t0));
stxi_d(alloca_offset - 8, _FP_REGNO, rn(t0));
- ldxi(r0, _FP_REGNO, alloca_offset - 4);
+ if (r0 != JIT_NOREG)
+ ldxi(r0, _FP_REGNO, alloca_offset - 4);
ldxi(r1, _FP_REGNO, alloca_offset - 8);
jit_unget_reg(t1);
jit_unget_reg(t0);
diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c
index c460264..7a23ebd 100644
--- a/lib/jit_hppa-sz.c
+++ b/lib/jit_hppa-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
12, /* fnmsr_d */
0, /* fnmsi_d */
+ 36, /* hmulr */
+ 40, /* hmuli */
+ 48, /* hmulr_u */
+ 56, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index b6b1f59..6330bf6 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -1024,6 +1024,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 4460940..a4ec58d 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -1206,6 +1206,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr(r0,r1,r2) mulh(r0,r1,r2,1)
+#define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr_u(r0,r1,r2) mulh(r0,r1,r2,0)
+#define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
#define divr(r0,r1,r2) _divr(_jit,r0,r1,r2)
static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define divi(r0,r1,i0) _divi(_jit,r0,r1,i0)
@@ -3890,6 +3896,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
jit_unget_reg(reg);
}
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
#if !defined(__GNUC__)
static long
__divdi3(long u, long v)
diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c
index 2a8feaf..9033334 100644
--- a/lib/jit_ia64-sz.c
+++ b/lib/jit_ia64-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
16, /* fnmsr_d */
0, /* fnmsi_d */
+ 32, /* hmulr */
+ 32, /* hmuli */
+ 32, /* hmulr_u */
+ 32, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index d385e8d..f689231 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -1171,6 +1171,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(subc,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_loongarch-cpu.c b/lib/jit_loongarch-cpu.c
index 46e8ce7..22ca0f0 100644
--- a/lib/jit_loongarch-cpu.c
+++ b/lib/jit_loongarch-cpu.c
@@ -386,6 +386,12 @@ static void _rsbi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define mulr(r0, r1, r2) MUL_D(r0, r1, r2)
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) MULH_D(r0, r1, r2)
+# define hmuli(r0, r1, i0) _hmuli(_jit, r0, r1, i0)
+static void _hmuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr_u(r0, r1, r2) MULH_DU(r0, r1, r2)
+# define hmuli_u(r0, r1, i0) _hmuli_u(_jit, r0, r1, i0)
+static void _hmuli_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define qmulr(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 1)
# define qmulr_u(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 0)
# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
@@ -1244,6 +1250,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_loongarch-sz.c b/lib/jit_loongarch-sz.c
index 377dbc0..18e73aa 100644
--- a/lib/jit_loongarch-sz.c
+++ b/lib/jit_loongarch-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 20, /* hmuli */
+ 4, /* hmulr_u */
+ 20, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c
index b892563..cd38c4e 100644
--- a/lib/jit_loongarch.c
+++ b/lib/jit_loongarch.c
@@ -1098,6 +1098,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 7a3b600..37031c4 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -588,6 +588,14 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2)
+static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2)
+static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -2085,6 +2093,48 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_mips6_p())
+ muh_r6(r0, r1, r2);
+ else {
+ mult(r1, r2);
+ MFHI(r0);
+ }
+}
+
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_mips6_p())
+ muhu_r6(r0, r1, r2);
+ else {
+ multu(r1, r2);
+ MFHI(r0);
+ }
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c
index 8c5cc52..156fc95 100644
--- a/lib/jit_mips-sz.c
+++ b/lib/jit_mips-sz.c
@@ -525,6 +525,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 16, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1053,4 +1057,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 28, /* hmuli */
+ 8, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 50cccf1..1fec109 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1551,6 +1551,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_names.c b/lib/jit_names.c
index 52f1eef..88bc717 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -289,4 +289,6 @@ static char *code_name[] = {
"fnmsr_f", "fnmsi_f",
"fnmar_d", "fnmai_d",
"fnmsr_d", "fnmsi_d",
+ "hmulr", "hmuli",
+ "hmulr_u", "hmuli_u",
};
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 5bd1aa3..f84998b 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -600,16 +600,20 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# if __WORDSIZE == 32
# define mulr(r0,r1,r2) MULLW(r0,r1,r2)
# define mullr(r0,r1,r2) MULLW(r0,r1,r2)
-# define mulhr(r0,r1,r2) MULHW(r0,r1,r2)
-# define mulhr_u(r0,r1,r2) MULHWU(r0,r1,r2)
+# define hmulr(r0,r1,r2) MULHW(r0,r1,r2)
+# define hmulr_u(r0,r1,r2) MULHWU(r0,r1,r2)
# else
# define mulr(r0,r1,r2) MULLD(r0,r1,r2)
# define mullr(r0,r1,r2) MULLD(r0,r1,r2)
-# define mulhr(r0,r1,r2) MULHD(r0,r1,r2)
-# define mulhr_u(r0,r1,r2) MULHDU(r0,r1,r2)
+# define hmulr(r0,r1,r2) MULHD(r0,r1,r2)
+# define hmulr_u(r0,r1,r2) MULHDU(r0,r1,r2)
# endif
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -1535,6 +1539,9 @@ static void
_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
+ /* NOTE verified and overflow is correctly computed.
+ * No need to check for __WORDSIZE == 32.
+ * Documented as a 32 bit instruction. */
if (can_sign_extend_short_p(i0))
MULLI(r0, r1, i0);
else {
@@ -1546,6 +1553,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
@@ -1557,9 +1584,9 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
else
mullr(r0, r2, r3);
if (sign)
- mulhr(r1, r2, r3);
+ hmulr(r1, r2, r3);
else
- mulhr_u(r1, r2, r3);
+ hmulr_u(r1, r2, r3);
if (r0 == r2 || r0 == r3) {
movr(r0, rn(reg));
jit_unget_reg(reg);
diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c
index 2015464..136f1d4 100644
--- a/lib/jit_ppc-sz.c
+++ b/lib/jit_ppc-sz.c
@@ -527,6 +527,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 12, /* hmuli_u */
#endif /* !_CALL_SYSV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
@@ -1061,6 +1065,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 12, /* hmuli_u */
#endif /* _CALL_SYSV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
@@ -1594,6 +1602,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 24, /* hmuli */
+ 4, /* hmulr_u */
+ 24, /* hmuli_u */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
@@ -2126,6 +2138,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 24, /* hmuli */
+ 4, /* hmulr_u */
+ 24, /* hmuli_u */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index bddd523..9f98176 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -1377,6 +1377,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c
index 04174ae..8d6115a 100644
--- a/lib/jit_riscv-cpu.c
+++ b/lib/jit_riscv-cpu.c
@@ -302,6 +302,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define mulr(r0, r1, r2) MUL(r0, r1, r2)
# define muli(r0, r1, im) _muli(_jit, r0, r1, im)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) MULH(r0, r1, r2)
+# define hmuli(r0, r1, im) _hmuli(_jit, r0, r1, im)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0, r1, r2) MULHU(r0, r1, r2)
+# define hmuli_u(r0, r1, im) _hmuli_u(_jit, r0, r1, im)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define divr(r0, r1, r2) DIV(r0, r1, r2)
# define divi(r0, r1, im) _divi(_jit, r0, r1, im)
static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -939,6 +945,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), i0);
+ hmulr(r0, r1, rn(t0));
+ jit_unget_reg(t0);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), i0);
+ hmulr_u(r0, r1, rn(t0));
+ jit_unget_reg(t0);
+}
+
+static void
_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t t0;
diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c
index 887f8dc..c08e5bd 100644
--- a/lib/jit_riscv-sz.c
+++ b/lib/jit_riscv-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c
index 2a399a9..27b0c5a 100644
--- a/lib/jit_riscv.c
+++ b/lib/jit_riscv.c
@@ -1149,6 +1149,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 3fe3e07..23f7346 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -1023,6 +1023,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -2765,14 +2769,14 @@ _qmulr(jit_state_t *_jit,
jit_int32_t reg;
/* The only invalid condition is r0 == r1 */
jit_int32_t t2, t3, s2, s3;
- if (r2 == r0 || r2 == r1) {
+ if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) {
s2 = jit_get_reg(jit_class_gpr);
t2 = rn(s2);
movr(t2, r2);
}
else
t2 = r2;
- if (r3 == r0 || r3 == r1) {
+ if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) {
s3 = jit_get_reg(jit_class_gpr);
t3 = rn(s3);
movr(t3, r3);
@@ -2815,7 +2819,8 @@ _qmulr_u(jit_state_t *_jit,
regno = jit_get_reg_pair();
movr(rn(regno) + 1, r2);
MULU_(rn(regno), r3);
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
@@ -2829,7 +2834,8 @@ _qmuli_u(jit_state_t *_jit,
movr(rn(regno) + 1, r2);
movi(rn(regno), i0);
MULU_(rn(regno), rn(regno));
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c
index c71b040..b8a87e8 100644
--- a/lib/jit_s390-sz.c
+++ b/lib/jit_s390-sz.c
@@ -524,6 +524,10 @@
0, /* fnmai_d */
10, /* fnmsr_d */
0, /* fnmsi_d */
+ 34, /* hmulr */
+ 42, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1052,4 +1056,8 @@
0, /* fnmai_d */
10, /* fnmsr_d */
0, /* fnmsi_d */
+ 44, /* hmulr */
+ 60, /* hmuli */
+ 12, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 858ea30..851d0d0 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -1115,6 +1115,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 3a86f9e..6562867 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -630,6 +630,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# if __WORDSIZE == 32
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
@@ -1633,6 +1637,8 @@ static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
+ if (r0 == JIT_NOREG)
+ r0 = r1;
if (sign)
SMUL(r2, r3, r0);
else
@@ -1646,6 +1652,8 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
{
jit_int32_t reg;
if (s13_p(i0)) {
+ if (r0 == JIT_NOREG)
+ r0 = r1;
if (sign)
SMULI(r2, i0, r0);
else
@@ -1698,7 +1706,8 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movr(_O0_REGNO, r3);
movr(_O1_REGNO, r2);
calli((jit_word_t)__llmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1711,7 +1720,8 @@ _qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movi(_O0_REGNO, i0);
movr(_O1_REGNO, r2);
calli((jit_word_t)__llmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1729,7 +1739,8 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movr(_O0_REGNO, r3);
movr(_O1_REGNO, r2);
calli((jit_word_t)__ullmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1742,7 +1753,8 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movi(_O0_REGNO, i0);
movr(_O1_REGNO, r2);
calli((jit_word_t)__ullmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c
index a8aeb1c..8a4ce7b 100644
--- a/lib/jit_sparc-sz.c
+++ b/lib/jit_sparc-sz.c
@@ -524,6 +524,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 16, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1052,4 +1056,8 @@
0, /* fnmai_d */
20, /* fnmsr_d */
0, /* fnmsi_d */
+ 44, /* hmulr */
+ 60, /* hmuli */
+ 44, /* hmulr_u */
+ 60, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index 1acf636..bd8756d 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1409,6 +1409,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 47529cf..44f5b45 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -256,6 +256,10 @@ static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
+# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
+# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
+# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
# define umulr(r0) unr(X86_IMUL, r0)
# define umulr_u(r0) unr(X86_MUL, r0)
# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
@@ -1525,14 +1529,20 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
else
umulr_u(mul);
- if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
- xchgr(_RAX_REGNO, _RDX_REGNO);
+ if (r0 != JIT_NOREG) {
+ if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
+ xchgr(_RAX_REGNO, _RDX_REGNO);
+ else {
+ if (r0 != _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ movr(r1, _RDX_REGNO);
+ if (r0 == _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ }
+ }
else {
- if (r0 != _RDX_REGNO)
- movr(r0, _RAX_REGNO);
+ assert(r1 != JIT_NOREG);
movr(r1, _RDX_REGNO);
- if (r0 == _RDX_REGNO)
- movr(r0, _RAX_REGNO);
}
clear(_RDX_REGNO, _RDX);
diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c
index a2b608c..99bb625 100644
--- a/lib/jit_x86-sz.c
+++ b/lib/jit_x86-sz.c
@@ -525,6 +525,10 @@
0, /* fnmai_d */
27, /* fnmsr_d */
0, /* fnmsi_d */
+ 18, /* hmulr */
+ 23, /* hmuli */
+ 18, /* hmulr_u */
+ 23, /* hmuli_u */
#endif /* __X32 */
#if __X64
@@ -1054,6 +1058,10 @@
0, /* fnmai_d */
30, /* fnmsr_d */
0, /* fnmsi_d */
+ 17, /* hmulr */
+ 27, /* hmuli */
+ 17, /* hmulr_u */
+ 27, /* hmuli_u */
#else
# if __X64_32
@@ -1582,6 +1590,10 @@
0, /* fnmai_d */
31, /* fnmsr_d */
0, /* fnmsi_d */
+ 15, /* hmulr */
+ 21, /* hmuli */
+ 15, /* hmulr_u */
+ 21, /* hmuli_u */
#else
#define JIT_INSTR_MAX 112
@@ -2109,6 +2121,10 @@
0, /* fnmai_d */
31, /* fnmsr_d */
0, /* fnmsi_d */
+ 17, /* hmulr */
+ 27, /* hmuli */
+ 17, /* hmulr_u */
+ 27, /* hmuli_u */
#endif /* __CYGWIN__ || _WIN32 */
# endif /* __X64_32 */
#endif /* __X64 */
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index fb5f3ca..dd4fccd 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1798,6 +1798,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/lightning.c b/lib/lightning.c
index d1d8ffc..25a6078 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1551,7 +1551,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
case jit_code_addi: case jit_code_addxi: case jit_code_addci:
case jit_code_subi: case jit_code_subxi: case jit_code_subci:
case jit_code_rsbi:
- case jit_code_muli: case jit_code_divi: case jit_code_divi_u:
+ case jit_code_muli: case jit_code_hmuli: case jit_code_hmuli_u:
+ case jit_code_divi: case jit_code_divi_u:
case jit_code_remi: case jit_code_remi_u: case jit_code_andi:
case jit_code_ori: case jit_code_xori: case jit_code_lshi:
case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti:
@@ -1592,7 +1593,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
break;
case jit_code_addr: case jit_code_addxr: case jit_code_addcr:
case jit_code_subr: case jit_code_subxr: case jit_code_subcr:
- case jit_code_mulr: case jit_code_divr: case jit_code_divr_u:
+ case jit_code_mulr: case jit_code_hmulr: case jit_code_hmulr_u:
+ case jit_code_divr: case jit_code_divr_u:
case jit_code_remr: case jit_code_remr_u: case jit_code_andr:
case jit_code_orr: case jit_code_xorr: case jit_code_lshr:
case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr:
generated by cgit v1.2.3 (git 2.25.1) at 2025年09月17日 06:41:23 +0000

AltStyle によって変換されたページ (->オリジナル) /