Add back the jit_hmul interfaces - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年08月21日 19:45:10 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年08月21日 19:45:10 -0300
commit512f9c3ccd3ec43d95fb235040451558b816cfff (patch)
treeac9afd08546379908922b86fff6639e85f49e19d /lib
parent3d72aba731677c1c262692a584bd41b6b12c792e (diff)
downloadlightning-512f9c3ccd3ec43d95fb235040451558b816cfff.tar.gz
Add back the jit_hmul interfaces
In Lightning 1.x it did exist, but at first jit_qmul appeared to provide all usages, as hmul was used for high bits computation of a complete multiplication. It turns out there might be other cases where only the top bits are really required. One example is division by constants. Now an optimized version that attempts to reduce used instructions when applicable has been added. * check/Makefile.am, check/lightning.c: Add new hmul tests. * doc/body.texi: Document hmul. * include/lightning.h.in: Create the new hmul codes. * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_loongarch-cpu.c, lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c, lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c, lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Implement hmul and update the *-sz.c files. * lib/jit_names.c, lib/lightning.c: Add knowledge of hmul.
Diffstat (limited to 'lib')
-rw-r--r--lib/jit_aarch64-cpu.c 26
-rw-r--r--lib/jit_aarch64-sz.c 9
-rw-r--r--lib/jit_aarch64.c 4
-rw-r--r--lib/jit_alpha-cpu.c 48
-rw-r--r--lib/jit_alpha-sz.c 4
-rw-r--r--lib/jit_alpha.c 4
-rw-r--r--lib/jit_arm-cpu.c 33
-rw-r--r--lib/jit_arm-sz.c 1078
-rw-r--r--lib/jit_arm.c 4
-rw-r--r--lib/jit_hppa-cpu.c 13
-rw-r--r--lib/jit_hppa-sz.c 4
-rw-r--r--lib/jit_hppa.c 4
-rw-r--r--lib/jit_ia64-cpu.c 26
-rw-r--r--lib/jit_ia64-sz.c 4
-rw-r--r--lib/jit_ia64.c 4
-rw-r--r--lib/jit_loongarch-cpu.c 26
-rw-r--r--lib/jit_loongarch-sz.c 4
-rw-r--r--lib/jit_loongarch.c 4
-rw-r--r--lib/jit_mips-cpu.c 50
-rw-r--r--lib/jit_mips-sz.c 8
-rw-r--r--lib/jit_mips.c 4
-rw-r--r--lib/jit_names.c 2
-rw-r--r--lib/jit_ppc-cpu.c 39
-rw-r--r--lib/jit_ppc-sz.c 16
-rw-r--r--lib/jit_ppc.c 4
-rw-r--r--lib/jit_riscv-cpu.c 26
-rw-r--r--lib/jit_riscv-sz.c 4
-rw-r--r--lib/jit_riscv.c 4
-rw-r--r--lib/jit_s390-cpu.c 14
-rw-r--r--lib/jit_s390-sz.c 8
-rw-r--r--lib/jit_s390.c 4
-rw-r--r--lib/jit_sparc-cpu.c 20
-rw-r--r--lib/jit_sparc-sz.c 8
-rw-r--r--lib/jit_sparc.c 4
-rw-r--r--lib/jit_x86-cpu.c 22
-rw-r--r--lib/jit_x86-sz.c 16
-rw-r--r--lib/jit_x86.c 4
-rw-r--r--lib/lightning.c 6
38 files changed, 984 insertions, 578 deletions
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index d414cec..76a988b 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -555,6 +555,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define mulr(r0,r1,r2) MUL(r0,r1,r2)
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) SMULH(r0,r1,r2)
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0,r1,r2) UMULH(r0,r1,r2)
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -1229,6 +1235,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_qmulr(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c
index 0a7bc2d..435bbe9 100644
--- a/lib/jit_aarch64-sz.c
+++ b/lib/jit_aarch64-sz.c
@@ -526,7 +526,12 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
# else /* PACKED_STACK */
+
#define JIT_INSTR_MAX 120
0, /* data */
0, /* live */
@@ -1052,5 +1057,9 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
# endif
#endif /* __WORDSIZE */
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index cd4e79c..bc78800 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1426,6 +1426,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c
index 7791063..b977214 100644
--- a/lib/jit_alpha-cpu.c
+++ b/lib/jit_alpha-cpu.c
@@ -362,6 +362,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t);
# define mulr(r0,r1,r2) MULQ(r1,r2,r0)
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -1082,14 +1086,14 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t reg;
/* The only invalid condition is r0 == r1 */
jit_int32_t t2, t3, s2, s3;
- if (r2 == r0 || r2 == r1) {
+ if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) {
s2 = jit_get_reg(jit_class_gpr);
t2 = rn(s2);
movr(t2, r2);
}
else
t2 = r2;
- if (r3 == r0 || r3 == r1) {
+ if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) {
s3 = jit_get_reg(jit_class_gpr);
t3 = rn(s3);
movr(t3, r3);
@@ -1129,16 +1133,20 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0,
jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
{
jit_int32_t reg;
- if (r0 == r2 || r0 == r3) {
- reg = jit_get_reg(jit_class_gpr);
- mulr(rn(reg), r2, r3);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2 || r0 == r3) {
+ reg = jit_get_reg(jit_class_gpr);
+ mulr(rn(reg), r2, r3);
+ }
+ else
+ mulr(r0, r2, r3);
}
- else
- mulr(r0, r2, r3);
UMULH(r2, r3, r1);
- if (r0 == r2 || r0 == r3) {
- movr(r0, rn(reg));
- jit_unget_reg(reg);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2 || r0 == r3) {
+ movr(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
}
@@ -1148,16 +1156,20 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0,
{
jit_int32_t reg;
if (_u8_p(i0)) {
- if (r0 == r2) {
- reg = jit_get_reg(jit_class_gpr);
- muli(rn(reg), r2, i0);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ muli(rn(reg), r2, i0);
+ }
+ else
+ muli(r0, r2, i0);
}
- else
- muli(r0, r2, i0);
UMULHi(r2, i0, r1);
- if (r0 == r2) {
- movr(r0, rn(reg));
- jit_unget_reg(reg);
+ if (r0 != JIT_NOREG) {
+ if (r0 == r2) {
+ movr(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
}
else {
diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c
index b6ea741..fd39c0d 100644
--- a/lib/jit_alpha-sz.c
+++ b/lib/jit_alpha-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
20, /* fnmsr_d */
0, /* fnmsi_d */
+ 36, /* hmulr */
+ 60, /* hmuli */
+ 4, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index d2d378f..69bf397 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -1019,6 +1019,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index 9e19434..149db9a 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -984,6 +984,16 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) ihmulr(r0,r1,r2,1)
+# define hmulr_u(r0,r1,r2) ihmulr(r0,r1,r2,0)
+# define ihmulr(r0,r1,r2,cc) _ihmulr(_jit,r0,r1,r2,cc)
+static void _ihmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_bool_t);
+# define hmuli(r0,r1,i0) ihmuli(r0,r1,i0,1)
+# define hmuli_u(r0,r1,i0) ihmuli(r0,r1,i0,0)
+# define ihmuli(r0,r1,i0,cc) _ihmuli(_jit,r0,r1,i0,cc)
+static void _ihmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_word_t,jit_bool_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -2322,6 +2332,29 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_ihmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_bool_t sign)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ iqmulr(rn(reg), r0, r1, r2, sign);
+ jit_unget_reg(reg);
+}
+
+static void
+_ihmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_word_t i0, jit_bool_t sign)
+{
+ jit_int32_t t0, t1;
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ movi(rn(t1), i0);
+ iqmulr(rn(t0), r0, r1, rn(t1), sign);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c
index 31931e4..7ec5e9e 100644
--- a/lib/jit_arm-sz.c
+++ b/lib/jit_arm-sz.c
@@ -4,14 +4,14 @@
#define JIT_INSTR_MAX 144
0, /* data */
0, /* live */
- 20, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
- 26, /* prolog */
+ 30, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
@@ -43,45 +43,45 @@
0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
- 16, /* va_arg_d */
+ 28, /* va_arg_d */
0, /* va_end */
4, /* addr */
- 12, /* addi */
+ 12, /* addi */
4, /* addcr */
8, /* addci */
4, /* addxr */
4, /* addxi */
4, /* subr */
- 12, /* subi */
+ 12, /* subi */
4, /* subcr */
8, /* subci */
4, /* subxr */
4, /* subxi */
- 16, /* rsbi */
- 4, /* mulr */
- 12, /* muli */
+ 16, /* rsbi */
+ 8, /* mulr */
+ 12, /* muli */
4, /* qmulr */
- 12, /* qmuli */
+ 12, /* qmuli */
4, /* qmulr_u */
8, /* qmuli_u */
- 32, /* divr */
- 36, /* divi */
- 24, /* divr_u */
- 28, /* divi_u */
- 18, /* qdivr */
- 22, /* qdivi */
- 18, /* qdivr_u */
- 22, /* qdivi_u */
- 24, /* remr */
- 32, /* remi */
- 24, /* remr_u */
- 28, /* remi_u */
+ 32, /* divr */
+ 36, /* divi */
+ 24, /* divr_u */
+ 28, /* divi_u */
+ 18, /* qdivr */
+ 22, /* qdivi */
+ 18, /* qdivr_u */
+ 22, /* qdivi_u */
+ 24, /* remr */
+ 32, /* remi */
+ 24, /* remr_u */
+ 28, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 12, /* andi */
4, /* orr */
- 12, /* ori */
+ 12, /* ori */
4, /* xorr */
- 12, /* xori */
+ 12, /* xori */
4, /* lshr */
4, /* lshi */
4, /* rshr */
@@ -92,98 +92,98 @@
4, /* negi */
4, /* comr */
4, /* comi */
- 14, /* ltr */
- 14, /* lti */
- 14, /* ltr_u */
- 14, /* lti_u */
- 14, /* ler */
- 14, /* lei */
- 14, /* ler_u */
- 14, /* lei_u */
- 14, /* eqr */
- 14, /* eqi */
- 14, /* ger */
- 14, /* gei */
- 14, /* ger_u */
- 14, /* gei_u */
- 14, /* gtr */
- 14, /* gti */
- 14, /* gtr_u */
- 14, /* gti_u */
- 14, /* ner */
- 14, /* nei */
+ 14, /* ltr */
+ 14, /* lti */
+ 14, /* ltr_u */
+ 14, /* lti_u */
+ 14, /* ler */
+ 14, /* lei */
+ 14, /* ler_u */
+ 14, /* lei_u */
+ 14, /* eqr */
+ 14, /* eqi */
+ 14, /* ger */
+ 14, /* gei */
+ 14, /* ger_u */
+ 14, /* gei_u */
+ 14, /* gtr */
+ 14, /* gti */
+ 14, /* gtr_u */
+ 14, /* gti_u */
+ 14, /* ner */
+ 14, /* nei */
4, /* movr */
8, /* movi */
8, /* movnr */
8, /* movzr */
- 42, /* casr */
- 50, /* casi */
- 4, /* extr_c */
+ 42, /* casr */
+ 46, /* casi */
+ 8, /* extr_c */
4, /* exti_c */
4, /* extr_uc */
4, /* exti_uc */
- 4, /* extr_s */
+ 8, /* extr_s */
4, /* exti_s */
- 4, /* extr_us */
+ 8, /* extr_us */
4, /* exti_us */
0, /* extr_i */
0, /* exti_i */
0, /* extr_ui */
0, /* exti_ui */
- 8, /* bswapr_us */
+ 20, /* bswapr_us */
4, /* bswapi_us */
- 4, /* bswapr_ui */
+ 16, /* bswapr_ui */
8, /* bswapi_ui */
0, /* bswapr_ul */
0, /* bswapi_ul */
- 8, /* htonr_us */
+ 20, /* htonr_us */
4, /* htoni_us */
- 4, /* htonr_ui */
+ 16, /* htonr_ui */
8, /* htoni_ui */
0, /* htonr_ul */
0, /* htoni_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 12, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 12, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 12, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 12, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 12, /* ldi_i */
0, /* ldr_ui */
0, /* ldi_ui */
0, /* ldr_l */
0, /* ldi_l */
4, /* ldxr_c */
- 12, /* ldxi_c */
+ 12, /* ldxi_c */
4, /* ldxr_uc */
- 12, /* ldxi_uc */
+ 12, /* ldxi_uc */
4, /* ldxr_s */
- 12, /* ldxi_s */
+ 12, /* ldxi_s */
4, /* ldxr_us */
- 12, /* ldxi_us */
+ 12, /* ldxi_us */
4, /* ldxr_i */
- 12, /* ldxi_i */
+ 12, /* ldxi_i */
0, /* ldxr_ui */
0, /* ldxi_ui */
0, /* ldxr_l */
0, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 12, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 12, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 12, /* sti_i */
0, /* str_l */
0, /* sti_l */
4, /* stxr_c */
- 12, /* stxi_c */
+ 12, /* stxi_c */
4, /* stxr_s */
- 12, /* stxi_s */
+ 12, /* stxi_s */
4, /* stxr_i */
- 12, /* stxi_i */
+ 12, /* stxi_i */
0, /* stxr_l */
0, /* stxi_l */
8, /* bltr */
@@ -195,7 +195,7 @@
8, /* bler_u */
8, /* blei_u */
8, /* beqr */
- 16, /* beqi */
+ 16, /* beqi */
8, /* bger */
8, /* bgei */
8, /* bger_u */
@@ -205,7 +205,7 @@
8, /* bgtr_u */
8, /* bgti_u */
8, /* bner */
- 16, /* bnei */
+ 16, /* bnei */
8, /* bmsr */
8, /* bmsi */
8, /* bmcr */
@@ -226,10 +226,10 @@
8, /* bxsubi */
8, /* bxsubr_u */
8, /* bxsubi_u */
- 4, /* jmpr */
- 8, /* jmpi */
+ 12, /* jmpr */
+ 72, /* jmpi */
4, /* callr */
- 20, /* calli */
+ 20, /* calli */
0, /* prepare */
0, /* pushargr_c */
0, /* pushargi_c */
@@ -269,96 +269,96 @@
0, /* retval_i */
0, /* retval_ui */
0, /* retval_l */
- 16, /* epilog */
+ 276, /* epilog */
0, /* arg_f */
0, /* getarg_f */
0, /* putargr_f */
0, /* putargi_f */
- 4, /* addr_f */
- 8, /* addi_f */
- 4, /* subr_f */
- 8, /* subi_f */
- 8, /* rsbi_f */
- 4, /* mulr_f */
- 8, /* muli_f */
- 4, /* divr_f */
- 8, /* divi_f */
- 4, /* negr_f */
+ 24, /* addr_f */
+ 24, /* addi_f */
+ 24, /* subr_f */
+ 24, /* subi_f */
+ 24, /* rsbi_f */
+ 24, /* mulr_f */
+ 24, /* muli_f */
+ 24, /* divr_f */
+ 24, /* divi_f */
+ 12, /* negr_f */
0, /* negi_f */
- 4, /* absr_f */
+ 12, /* absr_f */
0, /* absi_f */
- 4, /* sqrtr_f */
+ 20, /* sqrtr_f */
0, /* sqrti_f */
- 18, /* ltr_f */
- 30, /* lti_f */
- 20, /* ler_f */
- 32, /* lei_f */
- 18, /* eqr_f */
- 30, /* eqi_f */
- 18, /* ger_f */
- 30, /* gei_f */
- 18, /* gtr_f */
- 30, /* gti_f */
- 18, /* ner_f */
- 30, /* nei_f */
- 18, /* unltr_f */
- 30, /* unlti_f */
- 18, /* unler_f */
- 30, /* unlei_f */
- 24, /* uneqr_f */
- 36, /* uneqi_f */
- 18, /* unger_f */
- 30, /* ungei_f */
- 18, /* ungtr_f */
- 30, /* ungti_f */
- 24, /* ltgtr_f */
- 36, /* ltgti_f */
- 18, /* ordr_f */
- 30, /* ordi_f */
- 18, /* unordr_f */
- 30, /* unordi_f */
- 8, /* truncr_f_i */
+ 24, /* ltr_f */
+ 30, /* lti_f */
+ 24, /* ler_f */
+ 32, /* lei_f */
+ 24, /* eqr_f */
+ 30, /* eqi_f */
+ 24, /* ger_f */
+ 30, /* gei_f */
+ 24, /* gtr_f */
+ 30, /* gti_f */
+ 28, /* ner_f */
+ 32, /* nei_f */
+ 56, /* unltr_f */
+ 64, /* unlti_f */
+ 56, /* unler_f */
+ 64, /* unlei_f */
+ 56, /* uneqr_f */
+ 64, /* uneqi_f */
+ 56, /* unger_f */
+ 64, /* ungei_f */
+ 56, /* ungtr_f */
+ 64, /* ungti_f */
+ 60, /* ltgtr_f */
+ 68, /* ltgti_f */
+ 28, /* ordr_f */
+ 32, /* ordi_f */
+ 56, /* unordr_f */
+ 64, /* unordi_f */
+ 20, /* truncr_f_i */
0, /* truncr_f_l */
- 8, /* extr_f */
- 4, /* extr_d_f */
- 4, /* movr_f */
- 12, /* movi_f */
- 4, /* ldr_f */
- 12, /* ldi_f */
+ 28, /* extr_f */
+ 22, /* extr_d_f */
+ 8, /* movr_f */
+ 16, /* movi_f */
+ 8, /* ldr_f */
+ 16, /* ldi_f */
8, /* ldxr_f */
- 16, /* ldxi_f */
- 4, /* str_f */
- 12, /* sti_f */
+ 16, /* ldxi_f */
+ 8, /* str_f */
+ 16, /* sti_f */
8, /* stxr_f */
- 16, /* stxi_f */
- 12, /* bltr_f */
- 24, /* blti_f */
- 12, /* bler_f */
- 24, /* blei_f */
- 12, /* beqr_f */
- 24, /* beqi_f */
- 12, /* bger_f */
- 24, /* bgei_f */
- 12, /* bgtr_f */
- 24, /* bgti_f */
- 12, /* bner_f */
- 24, /* bnei_f */
- 16, /* bunltr_f */
- 28, /* bunlti_f */
- 16, /* bunler_f */
- 28, /* bunlei_f */
- 20, /* buneqr_f */
- 32, /* buneqi_f */
- 16, /* bunger_f */
- 28, /* bungei_f */
- 12, /* bungtr_f */
- 24, /* bungti_f */
- 20, /* bltgtr_f */
- 32, /* bltgti_f */
- 12, /* bordr_f */
- 24, /* bordi_f */
- 12, /* bunordr_f */
- 24, /* bunordi_f */
+ 16, /* stxi_f */
+ 28, /* bltr_f */
+ 32, /* blti_f */
+ 28, /* bler_f */
+ 32, /* blei_f */
+ 28, /* beqr_f */
+ 48, /* beqi_f */
+ 28, /* bger_f */
+ 32, /* bgei_f */
+ 28, /* bgtr_f */
+ 32, /* bgti_f */
+ 28, /* bner_f */
+ 32, /* bnei_f */
+ 28, /* bunltr_f */
+ 32, /* bunlti_f */
+ 28, /* bunler_f */
+ 32, /* bunlei_f */
+ 60, /* buneqr_f */
+ 68, /* buneqi_f */
+ 28, /* bunger_f */
+ 32, /* bungei_f */
+ 28, /* bungtr_f */
+ 32, /* bungti_f */
+ 60, /* bltgtr_f */
+ 68, /* bltgti_f */
+ 28, /* bordr_f */
+ 32, /* bordi_f */
+ 28, /* bunordr_f */
+ 32, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
@@ -368,91 +368,91 @@
0, /* getarg_d */
0, /* putargr_d */
0, /* putargi_d */
- 4, /* addr_d */
- 20, /* addi_d */
- 4, /* subr_d */
- 20, /* subi_d */
- 20, /* rsbi_d */
- 4, /* mulr_d */
- 20, /* muli_d */
- 4, /* divr_d */
- 20, /* divi_d */
- 4, /* negr_d */
+ 34, /* addr_d */
+ 36, /* addi_d */
+ 34, /* subr_d */
+ 36, /* subi_d */
+ 36, /* rsbi_d */
+ 34, /* mulr_d */
+ 36, /* muli_d */
+ 34, /* divr_d */
+ 36, /* divi_d */
+ 20, /* negr_d */
0, /* negi_d */
- 4, /* absr_d */
+ 20, /* absr_d */
0, /* absi_d */
- 4, /* sqrtr_d */
+ 26, /* sqrtr_d */
0, /* sqrti_d */
- 18, /* ltr_d */
- 34, /* lti_d */
- 20, /* ler_d */
- 36, /* lei_d */
- 18, /* eqr_d */
- 34, /* eqi_d */
- 18, /* ger_d */
- 34, /* gei_d */
- 18, /* gtr_d */
- 34, /* gti_d */
- 18, /* ner_d */
- 34, /* nei_d */
- 18, /* unltr_d */
- 34, /* unlti_d */
- 18, /* unler_d */
- 34, /* unlei_d */
- 24, /* uneqr_d */
- 40, /* uneqi_d */
- 18, /* unger_d */
- 34, /* ungei_d */
- 18, /* ungtr_d */
- 34, /* ungti_d */
- 24, /* ltgtr_d */
- 40, /* ltgti_d */
- 18, /* ordr_d */
- 34, /* ordi_d */
- 18, /* unordr_d */
- 34, /* unordi_d */
- 8, /* truncr_d_i */
+ 28, /* ltr_d */
+ 34, /* lti_d */
+ 28, /* ler_d */
+ 36, /* lei_d */
+ 28, /* eqr_d */
+ 34, /* eqi_d */
+ 28, /* ger_d */
+ 34, /* gei_d */
+ 28, /* gtr_d */
+ 34, /* gti_d */
+ 32, /* ner_d */
+ 36, /* nei_d */
+ 66, /* unltr_d */
+ 72, /* unlti_d */
+ 66, /* unler_d */
+ 72, /* unlei_d */
+ 66, /* uneqr_d */
+ 72, /* uneqi_d */
+ 66, /* unger_d */
+ 72, /* ungei_d */
+ 66, /* ungtr_d */
+ 72, /* ungti_d */
+ 70, /* ltgtr_d */
+ 76, /* ltgti_d */
+ 32, /* ordr_d */
+ 36, /* ordi_d */
+ 66, /* unordr_d */
+ 72, /* unordi_d */
+ 20, /* truncr_d_i */
0, /* truncr_d_l */
- 8, /* extr_d */
- 4, /* extr_f_d */
- 4, /* movr_d */
- 32, /* movi_d */
- 4, /* ldr_d */
- 12, /* ldi_d */
- 8, /* ldxr_d */
- 16, /* ldxi_d */
- 4, /* str_d */
- 12, /* sti_d */
- 8, /* stxr_d */
- 16, /* stxi_d */
- 12, /* bltr_d */
- 28, /* blti_d */
- 12, /* bler_d */
- 28, /* blei_d */
- 12, /* beqr_d */
- 36, /* beqi_d */
- 12, /* bger_d */
- 28, /* bgei_d */
- 12, /* bgtr_d */
- 28, /* bgti_d */
- 12, /* bner_d */
- 28, /* bnei_d */
- 16, /* bunltr_d */
- 32, /* bunlti_d */
- 16, /* bunler_d */
- 32, /* bunlei_d */
- 20, /* buneqr_d */
- 36, /* buneqi_d */
- 16, /* bunger_d */
- 32, /* bungei_d */
- 12, /* bungtr_d */
- 28, /* bungti_d */
- 20, /* bltgtr_d */
- 36, /* bltgti_d */
- 12, /* bordr_d */
- 28, /* bordi_d */
- 12, /* bunordr_d */
- 28, /* bunordi_d */
+ 36, /* extr_d */
+ 22, /* extr_f_d */
+ 16, /* movr_d */
+ 32, /* movi_d */
+ 16, /* ldr_d */
+ 24, /* ldi_d */
+ 20, /* ldxr_d */
+ 28, /* ldxi_d */
+ 16, /* str_d */
+ 24, /* sti_d */
+ 20, /* stxr_d */
+ 28, /* stxi_d */
+ 32, /* bltr_d */
+ 36, /* blti_d */
+ 32, /* bler_d */
+ 36, /* blei_d */
+ 32, /* beqr_d */
+ 52, /* beqi_d */
+ 32, /* bger_d */
+ 36, /* bgei_d */
+ 32, /* bgtr_d */
+ 36, /* bgti_d */
+ 32, /* bner_d */
+ 36, /* bnei_d */
+ 32, /* bunltr_d */
+ 36, /* bunlti_d */
+ 32, /* bunler_d */
+ 36, /* bunlei_d */
+ 68, /* buneqr_d */
+ 76, /* buneqi_d */
+ 32, /* bunger_d */
+ 36, /* bungei_d */
+ 32, /* bungtr_d */
+ 36, /* bungti_d */
+ 68, /* bltgtr_d */
+ 76, /* bltgti_d */
+ 32, /* bordr_d */
+ 36, /* bordi_d */
+ 32, /* bunordr_d */
+ 36, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
@@ -460,72 +460,76 @@
0, /* retval_d */
4, /* movr_w_f */
8, /* movi_w_f */
- 4, /* movr_ww_d */
- 16, /* movi_ww_d */
+ 8, /* movr_ww_d */
+ 20, /* movi_ww_d */
0, /* movr_w_d */
0, /* movi_w_d */
4, /* movr_f_w */
- 4, /* movi_f_w */
- 4, /* movr_d_ww */
- 12, /* movi_d_ww */
+ 8, /* movi_f_w */
+ 8, /* movr_d_ww */
+ 12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
8, /* clor */
4, /* cloi */
4, /* clzr */
4, /* clzi */
- 12, /* ctor */
+ 12, /* ctor */
4, /* ctoi */
8, /* ctzr */
4, /* ctzi */
4, /* rbitr */
8, /* rbiti */
- 40, /* popcntr */
+ 40, /* popcntr */
4, /* popcnti */
- 12, /* lrotr */
+ 12, /* lrotr */
4, /* lroti */
4, /* rrotr */
4, /* rroti */
- 4, /* extr */
+ 8, /* extr */
4, /* exti */
- 4, /* extr_u */
+ 12, /* extr_u */
4, /* exti_u */
- 4, /* depr */
- 8, /* depi */
- 50, /* qlshr */
+ 24, /* depr */
+ 20, /* depi */
+ 50, /* qlshr */
8, /* qlshi */
- 50, /* qlshr_u */
+ 50, /* qlshr_u */
8, /* qlshi_u */
- 50, /* qrshr */
+ 50, /* qrshr */
8, /* qrshi */
- 50, /* qrshr_u */
+ 50, /* qrshr_u */
8, /* qrshi_u */
- 72, /* unldr */
- 44, /* unldi */
- 72, /* unldr_u */
- 44, /* unldi_u */
- 68, /* unstr */
- 44, /* unsti */
- 140, /* unldr_x */
- 76, /* unldi_x */
- 144, /* unstr_x */
- 92, /* unsti_x */
- 8, /* fmar_f */
+ 72, /* unldr */
+ 44, /* unldi */
+ 72, /* unldr_u */
+ 44, /* unldi_u */
+ 68, /* unstr */
+ 44, /* unsti */
+ 144, /* unldr_x */
+ 80, /* unldi_x */
+ 148, /* unstr_x */
+ 96, /* unsti_x */
+ 48, /* fmar_f */
0, /* fmai_f */
- 8, /* fmsr_f */
+ 48, /* fmsr_f */
0, /* fmsi_f */
- 8, /* fmar_d */
+ 68, /* fmar_d */
0, /* fmai_d */
- 8, /* fmsr_d */
+ 68, /* fmsr_d */
0, /* fmsi_d */
- 12, /* fnmar_f */
+ 60, /* fnmar_f */
0, /* fnmai_f */
- 12, /* fnmsr_f */
+ 60, /* fnmsr_f */
0, /* fnmsi_f */
- 12, /* fnmar_d */
+ 88, /* fnmar_d */
0, /* fnmai_d */
- 12, /* fnmsr_d */
+ 88, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 8, /* hmuli_u */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
@@ -534,14 +538,14 @@
#define JIT_INSTR_MAX 144
0, /* data */
0, /* live */
- 20, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
- 30, /* prolog */
+ 26, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
@@ -573,45 +577,45 @@
0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
- 28, /* va_arg_d */
+ 16, /* va_arg_d */
0, /* va_end */
4, /* addr */
- 12, /* addi */
+ 12, /* addi */
4, /* addcr */
8, /* addci */
4, /* addxr */
4, /* addxi */
4, /* subr */
- 12, /* subi */
+ 12, /* subi */
4, /* subcr */
8, /* subci */
4, /* subxr */
4, /* subxi */
- 16, /* rsbi */
- 8, /* mulr */
- 12, /* muli */
+ 16, /* rsbi */
+ 4, /* mulr */
+ 12, /* muli */
4, /* qmulr */
- 12, /* qmuli */
+ 12, /* qmuli */
4, /* qmulr_u */
8, /* qmuli_u */
- 32, /* divr */
- 36, /* divi */
- 24, /* divr_u */
- 28, /* divi_u */
- 18, /* qdivr */
- 22, /* qdivi */
- 18, /* qdivr_u */
- 22, /* qdivi_u */
- 24, /* remr */
- 32, /* remi */
- 24, /* remr_u */
- 28, /* remi_u */
+ 32, /* divr */
+ 36, /* divi */
+ 24, /* divr_u */
+ 28, /* divi_u */
+ 18, /* qdivr */
+ 22, /* qdivi */
+ 18, /* qdivr_u */
+ 22, /* qdivi_u */
+ 24, /* remr */
+ 32, /* remi */
+ 24, /* remr_u */
+ 28, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 12, /* andi */
4, /* orr */
- 12, /* ori */
+ 12, /* ori */
4, /* xorr */
- 12, /* xori */
+ 12, /* xori */
4, /* lshr */
4, /* lshi */
4, /* rshr */
@@ -622,98 +626,98 @@
4, /* negi */
4, /* comr */
4, /* comi */
- 14, /* ltr */
- 14, /* lti */
- 14, /* ltr_u */
- 14, /* lti_u */
- 14, /* ler */
- 14, /* lei */
- 14, /* ler_u */
- 14, /* lei_u */
- 14, /* eqr */
- 14, /* eqi */
- 14, /* ger */
- 14, /* gei */
- 14, /* ger_u */
- 14, /* gei_u */
- 14, /* gtr */
- 14, /* gti */
- 14, /* gtr_u */
- 14, /* gti_u */
- 14, /* ner */
- 14, /* nei */
+ 14, /* ltr */
+ 14, /* lti */
+ 14, /* ltr_u */
+ 14, /* lti_u */
+ 14, /* ler */
+ 14, /* lei */
+ 14, /* ler_u */
+ 14, /* lei_u */
+ 14, /* eqr */
+ 14, /* eqi */
+ 14, /* ger */
+ 14, /* gei */
+ 14, /* ger_u */
+ 14, /* gei_u */
+ 14, /* gtr */
+ 14, /* gti */
+ 14, /* gtr_u */
+ 14, /* gti_u */
+ 14, /* ner */
+ 14, /* nei */
4, /* movr */
8, /* movi */
8, /* movnr */
8, /* movzr */
- 42, /* casr */
- 46, /* casi */
- 8, /* extr_c */
+ 42, /* casr */
+ 50, /* casi */
+ 4, /* extr_c */
4, /* exti_c */
4, /* extr_uc */
4, /* exti_uc */
- 8, /* extr_s */
+ 4, /* extr_s */
4, /* exti_s */
- 8, /* extr_us */
+ 4, /* extr_us */
4, /* exti_us */
0, /* extr_i */
0, /* exti_i */
0, /* extr_ui */
0, /* exti_ui */
- 20, /* bswapr_us */
+ 8, /* bswapr_us */
4, /* bswapi_us */
- 16, /* bswapr_ui */
+ 4, /* bswapr_ui */
8, /* bswapi_ui */
0, /* bswapr_ul */
0, /* bswapi_ul */
- 20, /* htonr_us */
+ 8, /* htonr_us */
4, /* htoni_us */
- 16, /* htonr_ui */
+ 4, /* htonr_ui */
8, /* htoni_ui */
0, /* htonr_ul */
0, /* htoni_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 12, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 12, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 12, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 12, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 12, /* ldi_i */
0, /* ldr_ui */
0, /* ldi_ui */
0, /* ldr_l */
0, /* ldi_l */
4, /* ldxr_c */
- 12, /* ldxi_c */
+ 12, /* ldxi_c */
4, /* ldxr_uc */
- 12, /* ldxi_uc */
+ 12, /* ldxi_uc */
4, /* ldxr_s */
- 12, /* ldxi_s */
+ 12, /* ldxi_s */
4, /* ldxr_us */
- 12, /* ldxi_us */
+ 12, /* ldxi_us */
4, /* ldxr_i */
- 12, /* ldxi_i */
+ 12, /* ldxi_i */
0, /* ldxr_ui */
0, /* ldxi_ui */
0, /* ldxr_l */
0, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 12, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 12, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 12, /* sti_i */
0, /* str_l */
0, /* sti_l */
4, /* stxr_c */
- 12, /* stxi_c */
+ 12, /* stxi_c */
4, /* stxr_s */
- 12, /* stxi_s */
+ 12, /* stxi_s */
4, /* stxr_i */
- 12, /* stxi_i */
+ 12, /* stxi_i */
0, /* stxr_l */
0, /* stxi_l */
8, /* bltr */
@@ -725,7 +729,7 @@
8, /* bler_u */
8, /* blei_u */
8, /* beqr */
- 16, /* beqi */
+ 16, /* beqi */
8, /* bger */
8, /* bgei */
8, /* bger_u */
@@ -735,7 +739,7 @@
8, /* bgtr_u */
8, /* bgti_u */
8, /* bner */
- 16, /* bnei */
+ 16, /* bnei */
8, /* bmsr */
8, /* bmsi */
8, /* bmcr */
@@ -756,10 +760,10 @@
8, /* bxsubi */
8, /* bxsubr_u */
8, /* bxsubi_u */
- 12, /* jmpr */
- 72, /* jmpi */
+ 4, /* jmpr */
+ 8, /* jmpi */
4, /* callr */
- 20, /* calli */
+ 20, /* calli */
0, /* prepare */
0, /* pushargr_c */
0, /* pushargi_c */
@@ -804,91 +808,91 @@
0, /* getarg_f */
0, /* putargr_f */
0, /* putargi_f */
- 24, /* addr_f */
- 24, /* addi_f */
- 24, /* subr_f */
- 24, /* subi_f */
- 24, /* rsbi_f */
- 24, /* mulr_f */
- 24, /* muli_f */
- 24, /* divr_f */
- 24, /* divi_f */
- 12, /* negr_f */
+ 4, /* addr_f */
+ 8, /* addi_f */
+ 4, /* subr_f */
+ 8, /* subi_f */
+ 8, /* rsbi_f */
+ 4, /* mulr_f */
+ 8, /* muli_f */
+ 4, /* divr_f */
+ 8, /* divi_f */
+ 4, /* negr_f */
0, /* negi_f */
- 12, /* absr_f */
+ 4, /* absr_f */
0, /* absi_f */
- 20, /* sqrtr_f */
+ 4, /* sqrtr_f */
0, /* sqrti_f */
- 24, /* ltr_f */
- 30, /* lti_f */
- 24, /* ler_f */
- 32, /* lei_f */
- 24, /* eqr_f */
- 30, /* eqi_f */
- 24, /* ger_f */
- 30, /* gei_f */
- 24, /* gtr_f */
- 30, /* gti_f */
- 28, /* ner_f */
- 32, /* nei_f */
- 56, /* unltr_f */
- 64, /* unlti_f */
- 56, /* unler_f */
- 64, /* unlei_f */
- 56, /* uneqr_f */
- 64, /* uneqi_f */
- 56, /* unger_f */
- 64, /* ungei_f */
- 56, /* ungtr_f */
- 64, /* ungti_f */
- 60, /* ltgtr_f */
- 68, /* ltgti_f */
- 28, /* ordr_f */
- 32, /* ordi_f */
- 56, /* unordr_f */
- 64, /* unordi_f */
- 20, /* truncr_f_i */
+ 18, /* ltr_f */
+ 30, /* lti_f */
+ 20, /* ler_f */
+ 32, /* lei_f */
+ 18, /* eqr_f */
+ 30, /* eqi_f */
+ 18, /* ger_f */
+ 30, /* gei_f */
+ 18, /* gtr_f */
+ 30, /* gti_f */
+ 18, /* ner_f */
+ 30, /* nei_f */
+ 18, /* unltr_f */
+ 30, /* unlti_f */
+ 18, /* unler_f */
+ 30, /* unlei_f */
+ 24, /* uneqr_f */
+ 36, /* uneqi_f */
+ 18, /* unger_f */
+ 30, /* ungei_f */
+ 18, /* ungtr_f */
+ 30, /* ungti_f */
+ 24, /* ltgtr_f */
+ 36, /* ltgti_f */
+ 18, /* ordr_f */
+ 30, /* ordi_f */
+ 18, /* unordr_f */
+ 30, /* unordi_f */
+ 8, /* truncr_f_i */
0, /* truncr_f_l */
- 28, /* extr_f */
- 22, /* extr_d_f */
- 8, /* movr_f */
- 16, /* movi_f */
- 8, /* ldr_f */
- 16, /* ldi_f */
+ 8, /* extr_f */
+ 4, /* extr_d_f */
+ 4, /* movr_f */
+ 12, /* movi_f */
+ 4, /* ldr_f */
+ 12, /* ldi_f */
8, /* ldxr_f */
- 16, /* ldxi_f */
- 8, /* str_f */
- 16, /* sti_f */
+ 16, /* ldxi_f */
+ 4, /* str_f */
+ 12, /* sti_f */
8, /* stxr_f */
- 16, /* stxi_f */
- 28, /* bltr_f */
- 32, /* blti_f */
- 28, /* bler_f */
- 32, /* blei_f */
- 28, /* beqr_f */
- 48, /* beqi_f */
- 28, /* bger_f */
- 32, /* bgei_f */
- 28, /* bgtr_f */
- 32, /* bgti_f */
- 28, /* bner_f */
- 32, /* bnei_f */
- 28, /* bunltr_f */
- 32, /* bunlti_f */
- 28, /* bunler_f */
- 32, /* bunlei_f */
- 60, /* buneqr_f */
- 68, /* buneqi_f */
- 28, /* bunger_f */
- 32, /* bungei_f */
- 28, /* bungtr_f */
- 32, /* bungti_f */
- 60, /* bltgtr_f */
- 68, /* bltgti_f */
- 28, /* bordr_f */
- 32, /* bordi_f */
- 28, /* bunordr_f */
- 32, /* bunordi_f */
+ 16, /* stxi_f */
+ 12, /* bltr_f */
+ 24, /* blti_f */
+ 12, /* bler_f */
+ 24, /* blei_f */
+ 12, /* beqr_f */
+ 24, /* beqi_f */
+ 12, /* bger_f */
+ 24, /* bgei_f */
+ 12, /* bgtr_f */
+ 24, /* bgti_f */
+ 12, /* bner_f */
+ 24, /* bnei_f */
+ 16, /* bunltr_f */
+ 28, /* bunlti_f */
+ 16, /* bunler_f */
+ 28, /* bunlei_f */
+ 20, /* buneqr_f */
+ 32, /* buneqi_f */
+ 16, /* bunger_f */
+ 28, /* bungei_f */
+ 12, /* bungtr_f */
+ 24, /* bungti_f */
+ 20, /* bltgtr_f */
+ 32, /* bltgti_f */
+ 12, /* bordr_f */
+ 24, /* bordi_f */
+ 12, /* bunordr_f */
+ 24, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
@@ -898,91 +902,91 @@
0, /* getarg_d */
0, /* putargr_d */
0, /* putargi_d */
- 34, /* addr_d */
- 36, /* addi_d */
- 34, /* subr_d */
- 36, /* subi_d */
- 36, /* rsbi_d */
- 34, /* mulr_d */
- 36, /* muli_d */
- 34, /* divr_d */
- 36, /* divi_d */
- 20, /* negr_d */
+ 4, /* addr_d */
+ 20, /* addi_d */
+ 4, /* subr_d */
+ 20, /* subi_d */
+ 20, /* rsbi_d */
+ 4, /* mulr_d */
+ 20, /* muli_d */
+ 4, /* divr_d */
+ 20, /* divi_d */
+ 4, /* negr_d */
0, /* negi_d */
- 20, /* absr_d */
+ 4, /* absr_d */
0, /* absi_d */
- 26, /* sqrtr_d */
+ 4, /* sqrtr_d */
0, /* sqrti_d */
- 28, /* ltr_d */
- 34, /* lti_d */
- 28, /* ler_d */
- 36, /* lei_d */
- 28, /* eqr_d */
- 34, /* eqi_d */
- 28, /* ger_d */
- 34, /* gei_d */
- 28, /* gtr_d */
- 34, /* gti_d */
- 32, /* ner_d */
- 36, /* nei_d */
- 66, /* unltr_d */
- 72, /* unlti_d */
- 66, /* unler_d */
- 72, /* unlei_d */
- 66, /* uneqr_d */
- 72, /* uneqi_d */
- 66, /* unger_d */
- 72, /* ungei_d */
- 66, /* ungtr_d */
- 72, /* ungti_d */
- 70, /* ltgtr_d */
- 76, /* ltgti_d */
- 32, /* ordr_d */
- 36, /* ordi_d */
- 66, /* unordr_d */
- 72, /* unordi_d */
- 20, /* truncr_d_i */
+ 18, /* ltr_d */
+ 34, /* lti_d */
+ 20, /* ler_d */
+ 36, /* lei_d */
+ 18, /* eqr_d */
+ 34, /* eqi_d */
+ 18, /* ger_d */
+ 34, /* gei_d */
+ 18, /* gtr_d */
+ 34, /* gti_d */
+ 18, /* ner_d */
+ 34, /* nei_d */
+ 18, /* unltr_d */
+ 34, /* unlti_d */
+ 18, /* unler_d */
+ 34, /* unlei_d */
+ 24, /* uneqr_d */
+ 40, /* uneqi_d */
+ 18, /* unger_d */
+ 34, /* ungei_d */
+ 18, /* ungtr_d */
+ 34, /* ungti_d */
+ 24, /* ltgtr_d */
+ 40, /* ltgti_d */
+ 18, /* ordr_d */
+ 34, /* ordi_d */
+ 18, /* unordr_d */
+ 34, /* unordi_d */
+ 8, /* truncr_d_i */
0, /* truncr_d_l */
- 36, /* extr_d */
- 22, /* extr_f_d */
- 16, /* movr_d */
- 32, /* movi_d */
- 16, /* ldr_d */
- 24, /* ldi_d */
- 20, /* ldxr_d */
- 28, /* ldxi_d */
- 16, /* str_d */
- 24, /* sti_d */
- 20, /* stxr_d */
- 28, /* stxi_d */
- 32, /* bltr_d */
- 36, /* blti_d */
- 32, /* bler_d */
- 36, /* blei_d */
- 32, /* beqr_d */
- 52, /* beqi_d */
- 32, /* bger_d */
- 36, /* bgei_d */
- 32, /* bgtr_d */
- 36, /* bgti_d */
- 32, /* bner_d */
- 36, /* bnei_d */
- 32, /* bunltr_d */
- 36, /* bunlti_d */
- 32, /* bunler_d */
- 36, /* bunlei_d */
- 68, /* buneqr_d */
- 76, /* buneqi_d */
- 32, /* bunger_d */
- 36, /* bungei_d */
- 32, /* bungtr_d */
- 36, /* bungti_d */
- 68, /* bltgtr_d */
- 76, /* bltgti_d */
- 32, /* bordr_d */
- 36, /* bordi_d */
- 32, /* bunordr_d */
- 36, /* bunordi_d */
+ 8, /* extr_d */
+ 4, /* extr_f_d */
+ 4, /* movr_d */
+ 32, /* movi_d */
+ 4, /* ldr_d */
+ 12, /* ldi_d */
+ 8, /* ldxr_d */
+ 16, /* ldxi_d */
+ 4, /* str_d */
+ 12, /* sti_d */
+ 8, /* stxr_d */
+ 16, /* stxi_d */
+ 12, /* bltr_d */
+ 28, /* blti_d */
+ 12, /* bler_d */
+ 28, /* blei_d */
+ 12, /* beqr_d */
+ 36, /* beqi_d */
+ 12, /* bger_d */
+ 28, /* bgei_d */
+ 12, /* bgtr_d */
+ 28, /* bgti_d */
+ 12, /* bner_d */
+ 28, /* bnei_d */
+ 16, /* bunltr_d */
+ 32, /* bunlti_d */
+ 16, /* bunler_d */
+ 32, /* bunlei_d */
+ 20, /* buneqr_d */
+ 36, /* buneqi_d */
+ 16, /* bunger_d */
+ 32, /* bungei_d */
+ 12, /* bungtr_d */
+ 28, /* bungti_d */
+ 20, /* bltgtr_d */
+ 36, /* bltgti_d */
+ 12, /* bordr_d */
+ 28, /* bordi_d */
+ 12, /* bunordr_d */
+ 28, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
@@ -990,71 +994,75 @@
0, /* retval_d */
4, /* movr_w_f */
8, /* movi_w_f */
- 8, /* movr_ww_d */
- 20, /* movi_ww_d */
+ 4, /* movr_ww_d */
+ 16, /* movi_ww_d */
0, /* movr_w_d */
0, /* movi_w_d */
4, /* movr_f_w */
- 8, /* movi_f_w */
- 8, /* movr_d_ww */
- 12, /* movi_d_ww */
+ 4, /* movi_f_w */
+ 4, /* movr_d_ww */
+ 12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
8, /* clor */
4, /* cloi */
4, /* clzr */
4, /* clzi */
- 12, /* ctor */
+ 12, /* ctor */
4, /* ctoi */
8, /* ctzr */
4, /* ctzi */
4, /* rbitr */
8, /* rbiti */
- 40, /* popcntr */
+ 40, /* popcntr */
4, /* popcnti */
- 12, /* lrotr */
+ 12, /* lrotr */
4, /* lroti */
4, /* rrotr */
4, /* rroti */
- 8, /* extr */
+ 4, /* extr */
4, /* exti */
- 12, /* extr_u */
+ 4, /* extr_u */
4, /* exti_u */
- 24, /* depr */
- 20, /* depi */
- 50, /* qlshr */
+ 4, /* depr */
+ 8, /* depi */
+ 50, /* qlshr */
8, /* qlshi */
- 50, /* qlshr_u */
+ 50, /* qlshr_u */
8, /* qlshi_u */
- 50, /* qrshr */
+ 50, /* qrshr */
8, /* qrshi */
- 50, /* qrshr_u */
+ 50, /* qrshr_u */
8, /* qrshi_u */
- 72, /* unldr */
- 44, /* unldi */
- 72, /* unldr_u */
- 44, /* unldi_u */
- 68, /* unstr */
- 44, /* unsti */
+ 72, /* unldr */
+ 44, /* unldi */
+ 72, /* unldr_u */
+ 44, /* unldi_u */
+ 68, /* unstr */
+ 44, /* unsti */
140, /* unldr_x */
- 76, /* unldi_x */
+ 76, /* unldi_x */
144, /* unstr_x */
- 96, /* unsti_x */
- 48, /* fmar_f */
+ 92, /* unsti_x */
+ 8, /* fmar_f */
0, /* fmai_f */
- 48, /* fmsr_f */
+ 8, /* fmsr_f */
0, /* fmsi_f */
- 68, /* fmar_d */
+ 8, /* fmar_d */
0, /* fmai_d */
- 68, /* fmsr_d */
+ 8, /* fmsr_d */
0, /* fmsi_d */
- 60, /* fnmar_f */
+ 12, /* fnmar_f */
0, /* fnmai_f */
- 60, /* fnmsr_f */
+ 12, /* fnmsr_f */
0, /* fnmsi_f */
- 88, /* fnmar_d */
+ 12, /* fnmar_d */
0, /* fnmai_d */
- 88, /* fnmsr_d */
+ 12, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 8, /* hmuli_u */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index df6c0e7..25aa7cb 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1573,6 +1573,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c
index bb6e540..4db79d8 100644
--- a/lib/jit_hppa-cpu.c
+++ b/lib/jit_hppa-cpu.c
@@ -707,6 +707,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+#define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+#define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+#define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
static long long __llmul(int, int);
#define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,
@@ -1939,7 +1943,8 @@ _qmulr(jit_state_t *_jit,
movr(_R26_REGNO, r2);
movr(_R25_REGNO, r3);
calli((jit_word_t)__llmul);
- movr(r0, _R29_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _R29_REGNO);
movr(r1, _R28_REGNO);
}
@@ -1950,7 +1955,8 @@ _qmuli(jit_state_t *_jit,
movr(_R26_REGNO, r2);
movi(_R25_REGNO, i0);
calli((jit_word_t)__llmul);
- movr(r0, _R29_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _R29_REGNO);
movr(r1, _R28_REGNO);
}
@@ -1967,7 +1973,8 @@ _qmulr_u(jit_state_t *_jit,
ldxi_f(rn(t1), _FP_REGNO, alloca_offset - 8);
XMPYU(rn(t0), rn(t1), rn(t0));
stxi_d(alloca_offset - 8, _FP_REGNO, rn(t0));
- ldxi(r0, _FP_REGNO, alloca_offset - 4);
+ if (r0 != JIT_NOREG)
+ ldxi(r0, _FP_REGNO, alloca_offset - 4);
ldxi(r1, _FP_REGNO, alloca_offset - 8);
jit_unget_reg(t1);
jit_unget_reg(t0);
diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c
index c460264..7a23ebd 100644
--- a/lib/jit_hppa-sz.c
+++ b/lib/jit_hppa-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
12, /* fnmsr_d */
0, /* fnmsi_d */
+ 36, /* hmulr */
+ 40, /* hmuli */
+ 48, /* hmulr_u */
+ 56, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index b6b1f59..6330bf6 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -1024,6 +1024,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 4460940..a4ec58d 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -1206,6 +1206,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr(r0,r1,r2) mulh(r0,r1,r2,1)
+#define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define hmulr_u(r0,r1,r2) mulh(r0,r1,r2,0)
+#define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
#define divr(r0,r1,r2) _divr(_jit,r0,r1,r2)
static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define divi(r0,r1,i0) _divi(_jit,r0,r1,i0)
@@ -3890,6 +3896,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
jit_unget_reg(reg);
}
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
#if !defined(__GNUC__)
static long
__divdi3(long u, long v)
diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c
index 2a8feaf..9033334 100644
--- a/lib/jit_ia64-sz.c
+++ b/lib/jit_ia64-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
16, /* fnmsr_d */
0, /* fnmsi_d */
+ 32, /* hmulr */
+ 32, /* hmuli */
+ 32, /* hmulr_u */
+ 32, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index d385e8d..f689231 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -1171,6 +1171,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(subc,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_loongarch-cpu.c b/lib/jit_loongarch-cpu.c
index 46e8ce7..22ca0f0 100644
--- a/lib/jit_loongarch-cpu.c
+++ b/lib/jit_loongarch-cpu.c
@@ -386,6 +386,12 @@ static void _rsbi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define mulr(r0, r1, r2) MUL_D(r0, r1, r2)
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) MULH_D(r0, r1, r2)
+# define hmuli(r0, r1, i0) _hmuli(_jit, r0, r1, i0)
+static void _hmuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr_u(r0, r1, r2) MULH_DU(r0, r1, r2)
+# define hmuli_u(r0, r1, i0) _hmuli_u(_jit, r0, r1, i0)
+static void _hmuli_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define qmulr(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 1)
# define qmulr_u(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 0)
# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
@@ -1244,6 +1250,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_loongarch-sz.c b/lib/jit_loongarch-sz.c
index 377dbc0..18e73aa 100644
--- a/lib/jit_loongarch-sz.c
+++ b/lib/jit_loongarch-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 20, /* hmuli */
+ 4, /* hmulr_u */
+ 20, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c
index b892563..cd38c4e 100644
--- a/lib/jit_loongarch.c
+++ b/lib/jit_loongarch.c
@@ -1098,6 +1098,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 7a3b600..37031c4 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -588,6 +588,14 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2)
+static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2)
+static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -2085,6 +2093,48 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_mips6_p())
+ muh_r6(r0, r1, r2);
+ else {
+ mult(r1, r2);
+ MFHI(r0);
+ }
+}
+
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_mips6_p())
+ muhu_r6(r0, r1, r2);
+ else {
+ multu(r1, r2);
+ MFHI(r0);
+ }
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c
index 8c5cc52..156fc95 100644
--- a/lib/jit_mips-sz.c
+++ b/lib/jit_mips-sz.c
@@ -525,6 +525,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 16, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1053,4 +1057,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 28, /* hmuli */
+ 8, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 50cccf1..1fec109 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1551,6 +1551,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_names.c b/lib/jit_names.c
index 52f1eef..88bc717 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -289,4 +289,6 @@ static char *code_name[] = {
"fnmsr_f", "fnmsi_f",
"fnmar_d", "fnmai_d",
"fnmsr_d", "fnmsi_d",
+ "hmulr", "hmuli",
+ "hmulr_u", "hmuli_u",
};
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 5bd1aa3..f84998b 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -600,16 +600,20 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# if __WORDSIZE == 32
# define mulr(r0,r1,r2) MULLW(r0,r1,r2)
# define mullr(r0,r1,r2) MULLW(r0,r1,r2)
-# define mulhr(r0,r1,r2) MULHW(r0,r1,r2)
-# define mulhr_u(r0,r1,r2) MULHWU(r0,r1,r2)
+# define hmulr(r0,r1,r2) MULHW(r0,r1,r2)
+# define hmulr_u(r0,r1,r2) MULHWU(r0,r1,r2)
# else
# define mulr(r0,r1,r2) MULLD(r0,r1,r2)
# define mullr(r0,r1,r2) MULLD(r0,r1,r2)
-# define mulhr(r0,r1,r2) MULHD(r0,r1,r2)
-# define mulhr_u(r0,r1,r2) MULHDU(r0,r1,r2)
+# define hmulr(r0,r1,r2) MULHD(r0,r1,r2)
+# define hmulr_u(r0,r1,r2) MULHDU(r0,r1,r2)
# endif
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc)
@@ -1535,6 +1539,9 @@ static void
_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
+ /* NOTE verified and overflow is correctly computed.
+ * No need to check for __WORDSIZE == 32.
+ * Documented as a 32 bit instruction. */
if (can_sign_extend_short_p(i0))
MULLI(r0, r1, i0);
else {
@@ -1546,6 +1553,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
@@ -1557,9 +1584,9 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
else
mullr(r0, r2, r3);
if (sign)
- mulhr(r1, r2, r3);
+ hmulr(r1, r2, r3);
else
- mulhr_u(r1, r2, r3);
+ hmulr_u(r1, r2, r3);
if (r0 == r2 || r0 == r3) {
movr(r0, rn(reg));
jit_unget_reg(reg);
diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c
index 2015464..136f1d4 100644
--- a/lib/jit_ppc-sz.c
+++ b/lib/jit_ppc-sz.c
@@ -527,6 +527,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 12, /* hmuli_u */
#endif /* !_CALL_SYSV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
@@ -1061,6 +1065,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 12, /* hmuli */
+ 4, /* hmulr_u */
+ 12, /* hmuli_u */
#endif /* _CALL_SYSV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
@@ -1594,6 +1602,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 24, /* hmuli */
+ 4, /* hmulr_u */
+ 24, /* hmuli_u */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
@@ -2126,6 +2138,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 24, /* hmuli */
+ 4, /* hmulr_u */
+ 24, /* hmuli_u */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index bddd523..9f98176 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -1377,6 +1377,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c
index 04174ae..8d6115a 100644
--- a/lib/jit_riscv-cpu.c
+++ b/lib/jit_riscv-cpu.c
@@ -302,6 +302,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define mulr(r0, r1, r2) MUL(r0, r1, r2)
# define muli(r0, r1, im) _muli(_jit, r0, r1, im)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) MULH(r0, r1, r2)
+# define hmuli(r0, r1, im) _hmuli(_jit, r0, r1, im)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr_u(r0, r1, r2) MULHU(r0, r1, r2)
+# define hmuli_u(r0, r1, im) _hmuli_u(_jit, r0, r1, im)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define divr(r0, r1, r2) DIV(r0, r1, r2)
# define divi(r0, r1, im) _divi(_jit, r0, r1, im)
static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -939,6 +945,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), i0);
+ hmulr(r0, r1, rn(t0));
+ jit_unget_reg(t0);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), i0);
+ hmulr_u(r0, r1, rn(t0));
+ jit_unget_reg(t0);
+}
+
+static void
_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t t0;
diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c
index 887f8dc..c08e5bd 100644
--- a/lib/jit_riscv-sz.c
+++ b/lib/jit_riscv-sz.c
@@ -524,4 +524,8 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 4, /* hmulr */
+ 16, /* hmuli */
+ 4, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c
index 2a399a9..27b0c5a 100644
--- a/lib/jit_riscv.c
+++ b/lib/jit_riscv.c
@@ -1149,6 +1149,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 3fe3e07..23f7346 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -1023,6 +1023,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
@@ -2765,14 +2769,14 @@ _qmulr(jit_state_t *_jit,
jit_int32_t reg;
/* The only invalid condition is r0 == r1 */
jit_int32_t t2, t3, s2, s3;
- if (r2 == r0 || r2 == r1) {
+ if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) {
s2 = jit_get_reg(jit_class_gpr);
t2 = rn(s2);
movr(t2, r2);
}
else
t2 = r2;
- if (r3 == r0 || r3 == r1) {
+ if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) {
s3 = jit_get_reg(jit_class_gpr);
t3 = rn(s3);
movr(t3, r3);
@@ -2815,7 +2819,8 @@ _qmulr_u(jit_state_t *_jit,
regno = jit_get_reg_pair();
movr(rn(regno) + 1, r2);
MULU_(rn(regno), r3);
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
@@ -2829,7 +2834,8 @@ _qmuli_u(jit_state_t *_jit,
movr(rn(regno) + 1, r2);
movi(rn(regno), i0);
MULU_(rn(regno), rn(regno));
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c
index c71b040..b8a87e8 100644
--- a/lib/jit_s390-sz.c
+++ b/lib/jit_s390-sz.c
@@ -524,6 +524,10 @@
0, /* fnmai_d */
10, /* fnmsr_d */
0, /* fnmsi_d */
+ 34, /* hmulr */
+ 42, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1052,4 +1056,8 @@
0, /* fnmai_d */
10, /* fnmsr_d */
0, /* fnmsi_d */
+ 44, /* hmulr */
+ 60, /* hmuli */
+ 12, /* hmulr_u */
+ 28, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 858ea30..851d0d0 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -1115,6 +1115,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 3a86f9e..6562867 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -630,6 +630,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# if __WORDSIZE == 32
# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1)
# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0)
@@ -1633,6 +1637,8 @@ static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
+ if (r0 == JIT_NOREG)
+ r0 = r1;
if (sign)
SMUL(r2, r3, r0);
else
@@ -1646,6 +1652,8 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
{
jit_int32_t reg;
if (s13_p(i0)) {
+ if (r0 == JIT_NOREG)
+ r0 = r1;
if (sign)
SMULI(r2, i0, r0);
else
@@ -1698,7 +1706,8 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movr(_O0_REGNO, r3);
movr(_O1_REGNO, r2);
calli((jit_word_t)__llmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1711,7 +1720,8 @@ _qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movi(_O0_REGNO, i0);
movr(_O1_REGNO, r2);
calli((jit_word_t)__llmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1729,7 +1739,8 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movr(_O0_REGNO, r3);
movr(_O1_REGNO, r2);
calli((jit_word_t)__ullmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
@@ -1742,7 +1753,8 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
movi(_O0_REGNO, i0);
movr(_O1_REGNO, r2);
calli((jit_word_t)__ullmul);
- movr(r0, _O1_REGNO);
+ if (r0 != JIT_NOREG)
+ movr(r0, _O1_REGNO);
movr(r1, _O0_REGNO);
QMUL_EPILOG();
}
diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c
index a8aeb1c..8a4ce7b 100644
--- a/lib/jit_sparc-sz.c
+++ b/lib/jit_sparc-sz.c
@@ -524,6 +524,10 @@
0, /* fnmai_d */
4, /* fnmsr_d */
0, /* fnmsi_d */
+ 8, /* hmulr */
+ 16, /* hmuli */
+ 8, /* hmulr_u */
+ 16, /* hmuli_u */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
@@ -1052,4 +1056,8 @@
0, /* fnmai_d */
20, /* fnmsr_d */
0, /* fnmsi_d */
+ 44, /* hmulr */
+ 60, /* hmuli */
+ 44, /* hmulr_u */
+ 60, /* hmuli_u */
#endif /* __WORDSIZE */
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index 1acf636..bd8756d 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1409,6 +1409,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 47529cf..44f5b45 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -256,6 +256,10 @@ static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
+# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
+# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
+# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
# define umulr(r0) unr(X86_IMUL, r0)
# define umulr_u(r0) unr(X86_MUL, r0)
# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
@@ -1525,14 +1529,20 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
else
umulr_u(mul);
- if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
- xchgr(_RAX_REGNO, _RDX_REGNO);
+ if (r0 != JIT_NOREG) {
+ if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
+ xchgr(_RAX_REGNO, _RDX_REGNO);
+ else {
+ if (r0 != _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ movr(r1, _RDX_REGNO);
+ if (r0 == _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ }
+ }
else {
- if (r0 != _RDX_REGNO)
- movr(r0, _RAX_REGNO);
+ assert(r1 != JIT_NOREG);
movr(r1, _RDX_REGNO);
- if (r0 == _RDX_REGNO)
- movr(r0, _RAX_REGNO);
}
clear(_RDX_REGNO, _RDX);
diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c
index a2b608c..99bb625 100644
--- a/lib/jit_x86-sz.c
+++ b/lib/jit_x86-sz.c
@@ -525,6 +525,10 @@
0, /* fnmai_d */
27, /* fnmsr_d */
0, /* fnmsi_d */
+ 18, /* hmulr */
+ 23, /* hmuli */
+ 18, /* hmulr_u */
+ 23, /* hmuli_u */
#endif /* __X32 */
#if __X64
@@ -1054,6 +1058,10 @@
0, /* fnmai_d */
30, /* fnmsr_d */
0, /* fnmsi_d */
+ 17, /* hmulr */
+ 27, /* hmuli */
+ 17, /* hmulr_u */
+ 27, /* hmuli_u */
#else
# if __X64_32
@@ -1582,6 +1590,10 @@
0, /* fnmai_d */
31, /* fnmsr_d */
0, /* fnmsi_d */
+ 15, /* hmulr */
+ 21, /* hmuli */
+ 15, /* hmulr_u */
+ 21, /* hmuli_u */
#else
#define JIT_INSTR_MAX 112
@@ -2109,6 +2121,10 @@
0, /* fnmai_d */
31, /* fnmsr_d */
0, /* fnmsi_d */
+ 17, /* hmulr */
+ 27, /* hmuli */
+ 17, /* hmulr_u */
+ 27, /* hmuli_u */
#endif /* __CYGWIN__ || _WIN32 */
# endif /* __X64_32 */
#endif /* __X64 */
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index fb5f3ca..dd4fccd 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1798,6 +1798,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsb,);
case_rrr(mul,);
case_rrw(mul,);
+ case_rrr(hmul,);
+ case_rrw(hmul,);
+ case_rrr(hmul, _u);
+ case_rrw(hmul, _u);
case_rrrr(qmul,);
case_rrrw(qmul,);
case_rrrr(qmul, _u);
diff --git a/lib/lightning.c b/lib/lightning.c
index d1d8ffc..25a6078 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1551,7 +1551,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
case jit_code_addi: case jit_code_addxi: case jit_code_addci:
case jit_code_subi: case jit_code_subxi: case jit_code_subci:
case jit_code_rsbi:
- case jit_code_muli: case jit_code_divi: case jit_code_divi_u:
+ case jit_code_muli: case jit_code_hmuli: case jit_code_hmuli_u:
+ case jit_code_divi: case jit_code_divi_u:
case jit_code_remi: case jit_code_remi_u: case jit_code_andi:
case jit_code_ori: case jit_code_xori: case jit_code_lshi:
case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti:
@@ -1592,7 +1593,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
break;
case jit_code_addr: case jit_code_addxr: case jit_code_addcr:
case jit_code_subr: case jit_code_subxr: case jit_code_subcr:
- case jit_code_mulr: case jit_code_divr: case jit_code_divr_u:
+ case jit_code_mulr: case jit_code_hmulr: case jit_code_hmulr_u:
+ case jit_code_divr: case jit_code_divr_u:
case jit_code_remr: case jit_code_remr_u: case jit_code_andr:
case jit_code_orr: case jit_code_xorr: case jit_code_lshr:
case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr:
generated by cgit v1.2.3 (git 2.39.1) at 2025年10月05日 00:38:25 +0000

AltStyle によって変換されたページ (->オリジナル) /