lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
Diffstat
-rw-r--r--ChangeLog 29
-rw-r--r--check/Makefile.am 17
-rw-r--r--check/all.tst 5
-rw-r--r--check/alu_rot.ok 1
-rw-r--r--check/alu_rot.tst 39
-rw-r--r--check/lightning.c 6
-rw-r--r--doc/body.texi 8
-rw-r--r--include/lightning.h.in 7
-rw-r--r--lib/jit_aarch64-cpu.c 48
-rw-r--r--lib/jit_aarch64.c 4
-rw-r--r--lib/jit_alpha.c 8
-rw-r--r--lib/jit_arm-cpu.c 58
-rw-r--r--lib/jit_arm.c 4
-rw-r--r--lib/jit_fallback.c 131
-rw-r--r--lib/jit_hppa-cpu.c 26
-rw-r--r--lib/jit_hppa.c 6
-rw-r--r--lib/jit_ia64.c 8
-rw-r--r--lib/jit_loongarch-cpu.c 21
-rw-r--r--lib/jit_loongarch.c 6
-rw-r--r--lib/jit_mips-cpu.c 62
-rw-r--r--lib/jit_mips.c 6
-rw-r--r--lib/jit_names.c 2
-rw-r--r--lib/jit_ppc-cpu.c 55
-rw-r--r--lib/jit_ppc.c 4
-rw-r--r--lib/jit_riscv.c 8
-rw-r--r--lib/jit_s390-cpu.c 23
-rw-r--r--lib/jit_s390.c 4
-rw-r--r--lib/jit_sparc.c 8
-rw-r--r--lib/jit_x86-cpu.c 4
-rw-r--r--lib/jit_x86.c 4
-rw-r--r--lib/lightning.c 6
31 files changed, 582 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index 6a45bd8..8fb09cf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2023年03月07日 Paulo Andrade <pcpa@gnu.org>
+
+ * check/alu_rot.tst, check/alu_rot.ok: New test files for the new
+ lrotr, lroti, rrotr and rroti instructions.
+ * check/Makefile.am, check/lightning.c, include/lightning.h.in,
+ lib/jit_names.c: lib/lightning.c, doc/body.texi: Update for the
+ new instructions.
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_arm-cpu.c,
+ lib/jit_arm.c: Implement optimized rrotr and rroti. lrotr and
+ lroti just adjust parameters for a left shift rotate.
+ * lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_ia64-cpu,
+ lib/jit_ia64.c, lib/jit_riscv-cpu.c, lib/jit_riscv.c,
+ jit_sparc-cpu.c, jit_sparc.c: Implement calls to fallback lrotr,
+ lroti, rrotr and rroti.
+ * lib/jit_hppa-cpu.c, lib/jit_hppa.c: Implement optimized rroti.
+ Other instructions use fallbacks.
+ * lib/jit_loongarch-cpu.c, lib/jit_loongarch.c: Implement optimized
+ rrotr and rroti. lrotr and lroti just adapt arguments and use a
+ right shift.
+ * lib/jit_mips-cpu.c, lib/jit_mips.c: If mips2, Implement optimized
+ rrotr and rroti. lrotr and lroti just adapt arguments and use a
+ right shift. If mips1 use fallbacks.
+ * lib/jit_ppc-cpu.c, lib/jit_ppc.c, jit_s390-cpu.c, jit_s390.c,
+ lib/jit_x86-cpu.c, lib/jit_x86.c: Implement optimized lrotr,
+ lroti, rrotr, rroti.
+ * lib/jit_fallback.c: Implement fallbacks for lrotr, lroti,
+ rrotr and rroti. Also add extra macro to avoid segfaults in s390,
+ that cannot use register zero for some addressing instructions.
+
2023年03月02日 Paulo Andrade <pcpa@gnu.org>
* check/popcnt.tst, check/popcnt.ok: New test files for the new
diff --git a/check/Makefile.am b/check/Makefile.am
index f24430e..0a49ac9 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -94,6 +94,7 @@ EXTRA_DIST = \
alu_rsh.tst alu_rsh.ok \
alu_com.tst alu_com.ok \
alu_neg.tst alu_neg.ok \
+ alu_rot.tst alu_rot.ok \
movzr.tst movzr.ok \
fop_abs.tst fop_abs.ok \
fop_sqrt.tst fop_sqrt.ok \
@@ -140,7 +141,7 @@ base_TESTS = \
alu_mul alu_div alu_rem \
alu_and alu_or alu_xor \
alu_lsh alu_rsh \
- alu_com alu_neg \
+ alu_com alu_neg alu_rot \
movzr \
fop_abs fop_sqrt \
varargs stack \
@@ -168,7 +169,7 @@ x87_TESTS = \
alu_sub.x87 alux_sub.x87 alu_rsb.x87 \
alu_mul.x87 alu_div.x87 alu_rem.x87 \
alu_and.x87 alu_or.x87 alu_xor.x87 \
- alu_lsh.x87 alu_rsh.x87 \
+ alu_lsh.x87 alu_rsh.x87 alu_rot.x87 \
alu_com.x87 alu_neg.x87 \
movzr.x87 \
fop_abs.x87 fop_sqrt.x87 \
@@ -192,7 +193,7 @@ x87_nodata_TESTS = \
alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata \
alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \
alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \
- alu_lsh.x87.nodata alu_rsh.x87.nodata \
+ alu_lsh.x87.nodata alu_rsh.x87.nodata alu_rot.x87.nodata \
alu_com.x87.nodata alu_neg.x87.nodata \
movzr.x87.nodata \
fop_abs.x87.nodata fop_sqrt.x87.nodata \
@@ -218,7 +219,7 @@ arm_TESTS = \
alu_sub.arm alux_sub.arm alu_rsb.arm \
alu_mul.arm alu_div.arm alu_rem.arm \
alu_and.arm alu_or.arm alu_xor.arm \
- alu_lsh.arm alu_rsh.arm \
+ alu_lsh.arm alu_rsh.arm alu_rot.arm \
alu_com.arm alu_neg.arm \
movzr.arm \
fop_abs.arm fop_sqrt.arm \
@@ -244,7 +245,7 @@ swf_TESTS = \
alu_sub.swf alux_sub.swf alu_rsb.swf \
alu_mul.swf alu_div.swf alu_rem.swf \
alu_and.swf alu_or.swf alu_xor.swf \
- alu_lsh.swf alu_rsh.swf \
+ alu_lsh.swf alu_rsh.swf alu_rot.swf \
alu_com.swf alu_neg.swf \
movzr.swf \
fop_abs.swf fop_sqrt.swf \
@@ -268,7 +269,7 @@ arm_swf_TESTS = \
alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \
alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf \
alu_and.arm.swf alu_or.arm.swf alu_xor.arm.swf \
- alu_lsh.arm.swf alu_rsh.arm.swf \
+ alu_lsh.arm.swf alu_rsh.arm.swf alu_rot.arm.swf \
alu_com.arm.swf alu_neg.arm.swf \
movzr.arm.swf \
fop_abs.arm.swf fop_sqrt.arm.swf \
@@ -293,7 +294,7 @@ arm4_swf_TESTS = \
alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \
alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf \
alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \
- alu_lsh.arm4.swf alu_rsh.arm4.swf \
+ alu_lsh.arm4.swf alu_rsh.arm4.swf alu_rot.arm4.swf \
alu_com.arm4.swf alu_neg.arm4.swf \
movzr.arm4.swf \
fop_abs.arm4.swf fop_sqrt.arm4.swf \
@@ -321,7 +322,7 @@ nodata_TESTS = \
alu_sub.nodata alux_sub.nodata alu_rsb.nodata \
alu_mul.nodata alu_div.nodata alu_rem.nodata \
alu_and.nodata alu_or.nodata alu_xor.nodata \
- alu_lsh.nodata alu_rsh.nodata \
+ alu_lsh.nodata alu_rsh.nodata alu_rot.nodata \
alu_com.nodata alu_neg.nodata \
movzr.nodata \
fop_abs.nodata fop_sqrt.nodata \
diff --git a/check/all.tst b/check/all.tst
index d24f7ae..db3b870 100644
--- a/check/all.tst
+++ b/check/all.tst
@@ -88,6 +88,11 @@
clzr %r0 %r1
ctor %r0 %r1
ctzr %r0 %r1
+ popcntr %r0 %r1
+ lrotr %r0 %r1 %r2
+ lroti %r0 %r1 0x1f
+ rrotr %r0 %r1 %r2
+ rroti %r0 %r1 0x1f
ltr %r0 %r1 %r2
lti %r0 %r1 2
ltr_u %r0 %r1 %r2
diff --git a/check/alu_rot.ok b/check/alu_rot.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_rot.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_rot.tst b/check/alu_rot.tst
new file mode 100644
index 0000000..6ab7bf0
--- /dev/null
+++ b/check/alu_rot.tst
@@ -0,0 +1,39 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define LROT(N, I0, I1, V) ALU(N, , lrot, I0, I1, V)
+ LROT(0, 0x7f, 1, 0xfe)
+#if __WORDSIZE == 32
+ LROT(1, 0xfffffffe, 31, 0x7fffffff)
+ LROT(2, 0x12345678, 11, 0xa2b3c091)
+ LROT(3, 0x80000001, 1, 0x03)
+#else
+ LROT(1, 0xfffffffffffffffe, 31, 0xffffffff7fffffff)
+ LROT(2, 0x123456789abcdef0, 43, 0xe6f78091a2b3c4d5)
+ LROT(3, 0x00000001ffffffff, 32, 0xffffffff00000001)
+ LROT(4, 0x80000001, 33, 0x200000001)
+ LROT(5, 0x8000000000, 35, 0x400)
+#endif
+
+#define RROT(N, I0, I1, V) ALU(N, , rrot, I0, I1, V)
+ RROT(0, 0xfe, 1, 0x7f)
+#if __WORDSIZE == 32
+ RROT(1, 0xfffffffe, 31, 0xfffffffd)
+ RROT(2, 0x12345678, 11, 0xcf02468a)
+ RROT(3, 0x80000001, 3, 0x30000000)
+#else
+ RROT(1, 0xfffffffffffffffe, 31, 0xfffffffdffffffff)
+ RROT(2, 0x123456789abcdef0, 43, 0xcf13579bde02468a)
+ RROT(3, 0x00000001ffffffff, 32, 0xffffffff00000001)
+ RROT(4, 0x80000001, 33, 0x4000000080000000)
+ RROT(5, 0x8000000000, 35, 0x10)
+#endif
+
+ prepare
+ pushargi ok
+ ellipsis
+ finishi @printf
+ ret
+ epilog
diff --git a/check/lightning.c b/check/lightning.c
index daa3786..19bf06f 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -320,6 +320,8 @@ static void xorr(void); static void xori(void);
static void lshr(void); static void lshi(void);
static void rshr(void); static void rshi(void);
static void rshr_u(void); static void rshi_u(void);
+static void lrotr(void); static void lroti(void);
+static void rrotr(void); static void rroti(void);
static void negr(void); static void comr(void);
static void clor(void); static void clzr(void);
static void ctor(void); static void ctzr(void);
@@ -682,6 +684,8 @@ static instr_t instr_vector[] = {
entry(lshr), entry(lshi),
entry(rshr), entry(rshi),
entry(rshr_u), entry(rshi_u),
+ entry(lrotr), entry(lroti),
+ entry(rrotr), entry(rroti),
entry(negr), entry(comr),
entry(clor), entry(clzr),
entry(ctor), entry(ctzr),
@@ -1537,6 +1541,8 @@ entry_ir_ir_ir(xorr) entry_ir_ir_im(xori)
entry_ir_ir_ir(lshr) entry_ir_ir_im(lshi)
entry_ir_ir_ir(rshr) entry_ir_ir_im(rshi)
entry_ir_ir_ir(rshr_u) entry_ir_ir_im(rshi_u)
+entry_ir_ir_ir(lrotr) entry_ir_ir_im(lroti)
+entry_ir_ir_ir(rrotr) entry_ir_ir_im(rroti)
entry_ir_ir(negr) entry_ir_ir(comr)
entry_ir_ir(clor) entry_ir_ir(clzr)
entry_ir_ir(ctor) entry_ir_ir(ctzr)
diff --git a/doc/body.texi b/doc/body.texi
index aec96f1..1c6d23e 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -261,10 +261,18 @@ lshr O1 = O2 << O3
lshi O1 = O2 << O3
rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+lrotr O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3))
+lroti O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3))
+rrotr O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3))
+rroti O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3))
movzr O1 = O3 ? O1 : O2
movnr O1 = O3 ? O2 : O1
@end example
+Note that @code{lrotr}, @code{lroti}, @code{rrotr} and @code{rroti}
+are described as the fallback operation. These are bit shift/rotation
+operation.
+
@item Four operand binary ALU operations
These accept two result registers, and two operands; the last one can
be an immediate. The first two arguments cannot be the same register.
diff --git a/include/lightning.h.in b/include/lightning.h.in
index 1aae759..f0c96e2 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -1062,6 +1062,13 @@ typedef enum {
#define jit_popcntr(u,v) jit_new_node_ww(jit_code_popcntr,u,v)
jit_code_popcntr,
+#define jit_lrotr(u,v,w) jit_new_node_www(jit_code_lrotr,u,v,w)
+#define jit_lroti(u,v,w) jit_new_node_www(jit_code_lroti,u,v,w)
+ jit_code_lrotr, jit_code_lroti,
+#define jit_rrotr(u,v,w) jit_new_node_www(jit_code_rrotr,u,v,w)
+#define jit_rroti(u,v,w) jit_new_node_www(jit_code_rroti,u,v,w)
+ jit_code_rrotr, jit_code_rroti,
+
jit_code_last_code
} jit_code_t;
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index 99d8756..66aaa5f 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -297,6 +297,8 @@ typedef union {
# define A64_LSL 0x1ac02000
# define A64_LSR 0x1ac02400
# define A64_ASR 0x1ac02800
+# define A64_RORV 0x1ac02c00
+# define A64_EXTR 0x13800000
# define A64_MUL 0x1b007c00
# define A64_SMULL 0x9b207c00
# define A64_SMULH 0x9b407c00
@@ -416,6 +418,9 @@ typedef union {
# define ASRI(r0,r1,i0) SBFM(r0,r1,i0,63)
# define LSR(Rd,Rn,Rm) oxxx(A64_LSR|XS,Rd,Rn,Rm)
# define LSRI(r0,r1,i0) UBFM(r0,r1,i0,63)
+# define RORV(Rd,Rn,Rm) oxxx(A64_RORV|XS,Rd,Rn,Rm)
+# define EXTR(Rd,Rn,Rm,Im) oxxx6(A64_EXTR|XS|DS,Rm,Im,Rn,Rd)
+# define ROR(Rd,Rn,Rm,Im) EXTR(Rd,Rn,Rm,Im)
# define AND(Rd,Rn,Rm) oxxx(A64_AND|XS,Rd,Rn,Rm)
/* actually should use oxxrs but logical_immediate returns proper encoding */
# define ANDI(Rd,Rn,Imm12) oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
@@ -514,6 +519,10 @@ static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
# define oxxx7(Op,Rt,Rt2,Rn,Simm7) _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
static void _oxxx7(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define oxxx6(Op,Rm,Imm6,Rn,Rd) _oxxx6(_jit,Op,Rm,Imm6,Rn,Rd)
+static void _oxxx6(jit_state_t*,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+
# define nop(i0) _nop(_jit,i0)
static void _nop(jit_state_t*,jit_int32_t);
# define addr(r0,r1,r2) ADD(r0,r1,r2)
@@ -584,6 +593,11 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define rshr_u(r0,r1,r2) LSR(r0,r1,r2)
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lroti(r0,r1,i0) rroti(r0,r1,64-i0)
+# define rrotr(r0,r1,r2) RORV(r0,r1,r2)
+# define rroti(r0,r1,i0) ROR(r0,r1,r1,i0)
# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
@@ -1033,6 +1047,24 @@ _oxxx7(jit_state_t *_jit, jit_int32_t Op,
}
static void
+_oxxx6(jit_state_t *_jit, jit_int32_t Op,
+ jit_int32_t Rm, jit_int32_t Imm6, jit_int32_t Rn, jit_int32_t Rd)
+{
+ instr_t i;
+ assert(!(Rm & ~0x1f));
+ assert(!(Rn & ~0x1f));
+ assert(!(Rd & ~0x1f));
+ assert(Imm6 >= 0 && Imm6 <= 63);
+ assert(!(Op & ~0xffe0fc00));
+ i.w = Op;
+ i.Rm.b = Rm;
+ i.imm6.b = Imm6;
+ i.Rn.b = Rn;
+ i.Rd.b = Rd;
+ ii(i.w);
+}
+
+static void
_nop(jit_state_t *_jit, jit_int32_t i0)
{
for (; i0 > 0; i0 -= 4)
@@ -1381,6 +1413,22 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, 64);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, 64);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
CMPI(r2, 0);
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index f3fe712..b545bfe 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1442,6 +1442,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index dd20661..b87719b 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -1035,6 +1035,14 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2)
+#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0)
+#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2)
+#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0)
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index c75c13f..c450679 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -224,6 +224,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned);
# define THUMB_ASRI 0x1000
# define THUMB2_ASRI 0xea4f0020
# define ARM_ROR 0x00000060
+# define THUMB_ROR 0x41c0
+# define THUMB2_ROR 0xfa60f000
+# define THUMB2_RORI 0xea4f0030
# define ARM_CMP 0x01500000
# define THUMB_CMP 0x4280
# define THUMB_CMPX 0x4500
@@ -637,6 +640,13 @@ static void _tdmb(jit_state_t *_jit, int im);
# define ASRI(rd,rn,im) CC_ASRI(ARM_CC_AL,rd,rn,im)
# define T1_ASRI(rd,rm,im) is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
# define T2_ASRI(rd,rm,im) tshift(THUMB2_ASRI,rd,rm,im)
+# define CC_ROR(cc,rd,rn,rm) CC_SHIFT(cc,ARM_ROR|ARM_R,rd,rm,rn,0)
+# define ROR(rd,rn,rm) CC_ROR(ARM_CC_AL,rd,rn,rm)
+# define T1_ROR(rdn,rm) is(THUMB_ROR|(_u3(rm)<<3)|_u3(rdn))
+# define T2_ROR(rd,rn,rm) torrr(THUMB2_ROR,rn,rd,rm)
+# define CC_RORI(cc,rd,rn,im) CC_SHIFT(cc,ARM_ROR,rd,0,rn,im)
+# define RORI(rd,rn,im) CC_RORI(ARM_CC_AL,rd,rn,im)
+# define T2_RORI(rd,rm,im) tshift(THUMB2_RORI,rd,rm,im)
# define CC_CMP(cc,rn,rm) corrr(cc,ARM_CMP,rn,0,rm)
# define CMP(rn,rm) CC_CMP(ARM_CC_AL,rn,rm)
# define T1_CMP(rn,rm) is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
@@ -1010,6 +1020,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lroti(r0,r1,i0) rroti(r0,r1,32-i0)
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define ccr(ct,cf,r0,r1,r2) _ccr(_jit,ct,cf,r0,r1,r2)
static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
# define cci(ct,cf,r0,r1,i0) _cci(_jit,ct,cf,r0,r1,i0)
@@ -2722,6 +2739,47 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, 64);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, 64);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_thumb_p()) {
+ if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+ T1_ROR(r0, r2);
+ else
+ T2_ROR(r0, r1, r2);
+ }
+ else
+ ROR(r0, r1, r2);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= 31);
+ if (i0 == 0)
+ movr(r0, r1);
+ else if (jit_thumb_p())
+ T2_RORI(r0, r1, i0);
+ else
+ RORI(r0, r1, i0);
+}
+
+static void
_ccr(jit_state_t *_jit, int ct, int cf,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index e64230f..953298f 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1580,6 +1580,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c
index 55bf7d3..bfc547b 100644
--- a/lib/jit_fallback.c
+++ b/lib/jit_fallback.c
@@ -27,6 +27,19 @@ static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
static void _fallback_bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
#define fallback_popcnt(r0,r1) _fallback_popcnt(_jit, r0, r1)
static void _fallback_popcnt(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_lrotr(r0, r1, r2) _fallback_lrotr(_jit, r0, r1, r2)
+static void _fallback_lrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
+#define fallback_lroti(r0, r1, i0) _fallback_lroti(_jit, r0, r1, i0)
+static void _fallback_lroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
+#define fallback_rrotr(r0, r1, r2) _fallback_rrotr(_jit, r0, r1, r2)
+static void _fallback_rrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
+#define fallback_rroti(r0, r1, i0) _fallback_rroti(_jit, r0, r1, i0)
+static void _fallback_rroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
+# if defined(__s390__) || defined(__s390x__)
+# define fallback_jit_get_reg(flags) jit_get_reg_but_zero(flags)
+# else
+# define fallback_jit_get_reg(flags) jit_get_reg(flags)
+# endif
# if defined(__ia64__)
# define fallback_flush() sync()
# elif defined(__mips__)
@@ -198,7 +211,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
/* XXX only attempts to fallback cas for lightning jit code */
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
if ((iscasi = r1 == _NOREG)) {
- r1_reg = jit_get_reg(jit_class_gpr|jit_class_sav);
+ r1_reg = fallback_jit_get_reg(jit_class_gpr|jit_class_sav);
r1 = rn(r1_reg);
movi(r1, i0);
}
@@ -273,8 +286,8 @@ _fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
};
jit_int32_t t0, t1;
jit_word_t loop, done;
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ t1 = fallback_jit_get_reg(jit_class_gpr);
movi(rn(t0), __WORDSIZE - 8);
fallback_flush();
loop = _jit->pc.w;
@@ -299,9 +312,9 @@ _fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
clz = fallback_jmpi(_jit->pc.w);
fallback_flush();
fallback_patch_bnei(l32, _jit->pc.w);
- r2_reg = jit_get_reg(jit_class_gpr);
+ r2_reg = fallback_jit_get_reg(jit_class_gpr);
r2 = rn(r2_reg);
- r1_reg = jit_get_reg(jit_class_gpr);
+ r1_reg = fallback_jit_get_reg(jit_class_gpr);
movr(rn(r1_reg), r1);
r1 = rn(r1_reg);
movi(r0, 0);
@@ -392,7 +405,7 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
};
/* return mod67[(-r1 & r1) % 67]; */
# endif
- t0 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
if (r0 == r1) {
negr(rn(t0), r1);
andr(r0, rn(t0), r1);
@@ -418,9 +431,9 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
ctz = fallback_jmpi(_jit->pc.w);
fallback_flush();
fallback_patch_bnei(l32, _jit->pc.w);
- r2_reg = jit_get_reg(jit_class_gpr);
+ r2_reg = fallback_jit_get_reg(jit_class_gpr);
r2 = rn(r2_reg);
- r1_reg = jit_get_reg(jit_class_gpr);
+ r1_reg = fallback_jit_get_reg(jit_class_gpr);
movr(rn(r1_reg), r1);
r1 = rn(r1_reg);
movi(r0, 0);
@@ -524,16 +537,16 @@ _fallback_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
31, 159, 95, 223, 63, 191, 127, 255
};
if (r0 == r1) {
- t0 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
r1_reg = rn(t0);
}
else {
t0 = JIT_NOREG;
r1_reg = r1;
}
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- t3 = jit_get_reg(jit_class_gpr);
+ t1 = fallback_jit_get_reg(jit_class_gpr);
+ t2 = fallback_jit_get_reg(jit_class_gpr);
+ t3 = fallback_jit_get_reg(jit_class_gpr);
if (r0 == r1)
movr(rn(t0), r1);
extr_uc(rn(t1), r1_reg);
@@ -572,9 +585,9 @@ v = ( v >> 16 ) | ( v << 16);
*/
jit_int32_t t0, t1, t2, t3, t4;
movr(r0, r1);
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ t1 = fallback_jit_get_reg(jit_class_gpr);
+ t2 = fallback_jit_get_reg(jit_class_gpr);
movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
@@ -631,20 +644,20 @@ while ((s >>= 1) > 0)
jit_int32_t s, mask;
jit_word_t loop, done, t0, t1;
movr(v, r1);
- s = jit_get_reg(jit_class_gpr);
+ s = fallback_jit_get_reg(jit_class_gpr);
movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
- mask = jit_get_reg(jit_class_gpr);
+ mask = fallback_jit_get_reg(jit_class_gpr);
movi(rn(mask), ~0L); /* mask = ~0; */
flush();
loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
rshi(rn(s), rn(s), 1); /* (s >>= 1) */
done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
- t0 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
- t1 = jit_get_reg(jit_class_gpr);
+ t1 = fallback_jit_get_reg(jit_class_gpr);
lshr(rn(t1), v, rn(s)); /* t1 = v << s */
comr(v, rn(mask)); /* v = ~mask */
andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
@@ -690,16 +703,16 @@ _fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
};
if (r0 == r1) {
- t0 = jit_get_reg(jit_class_gpr);
+ t0 = fallback_jit_get_reg(jit_class_gpr);
r1_reg = rn(t0);
}
else {
t0 = JIT_NOREG;
r1_reg = r1;
}
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- t3 = jit_get_reg(jit_class_gpr);
+ t1 = fallback_jit_get_reg(jit_class_gpr);
+ t2 = fallback_jit_get_reg(jit_class_gpr);
+ t3 = fallback_jit_get_reg(jit_class_gpr);
if (r0 == r1)
movr(rn(t0), r1);
extr_uc(rn(t1), r1_reg);
@@ -720,4 +733,76 @@ _fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
if (t0 != JIT_NOREG)
jit_unget_reg(t0);
}
+
+static void
+_fallback_lrotr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ /* r0 = (r1 << r2) | (r1 >> (__WORDSIZE - r2)) */
+ jit_int32_t t0, t1;
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ if (r0 == r1 || r0 == r2) {
+ t1 = fallback_jit_get_reg(jit_class_gpr);
+ lshr(rn(t0), r1, r2);
+ rsbi(rn(t1), r2, __WORDSIZE);
+ rshr_u(rn(t1), r1, rn(t1));
+ orr(r0, rn(t0), rn(t1));
+ jit_unget_reg(t1);
+ }
+ else {
+ lshr(r0, r1, r2);
+ rsbi(rn(t0), r2, __WORDSIZE);
+ rshr_u(rn(t0), r1, rn(t0));
+ orr(r0, r0, rn(t0));
+ }
+ jit_unget_reg(t0);
+}
+
+static void
+_fallback_lroti(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ lshi(rn(t0), r1, i0);
+ rshi_u(r0, r1, __WORDSIZE - i0);
+ orr(r0, r0, rn(t0));
+ jit_unget_reg(t0);
+}
+
+static void
+_fallback_rrotr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ /* r0 = (r1 >> r2) | (r1 << (__WORDSIZE - r2)) */
+ jit_int32_t t0, t1;
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ if (r0 == r1 || r0 == r2) {
+ t1 = fallback_jit_get_reg(jit_class_gpr);
+ rshr_u(rn(t0), r1, r2);
+ rsbi(rn(t1), r2, __WORDSIZE);
+ lshr(rn(t1), r1, rn(t1));
+ orr(r0, rn(t0), rn(t1));
+ jit_unget_reg(t1);
+ }
+ else {
+ rshr_u(r0, r1, r2);
+ rsbi(rn(t0), r2, __WORDSIZE);
+ lshr(rn(t0), r1, rn(t0));
+ orr(r0, r0, rn(t0));
+ }
+ jit_unget_reg(t0);
+}
+
+static void
+_fallback_rroti(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t t0;
+ t0 = fallback_jit_get_reg(jit_class_gpr);
+ rshi_u(rn(t0), r1, i0);
+ lshi(r0, r1, __WORDSIZE - i0);
+ orr(r0, r0, rn(t0));
+ jit_unget_reg(t0);
+}
#endif
diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c
index 3ddf725..fb02efe 100644
--- a/lib/jit_hppa-cpu.c
+++ b/lib/jit_hppa-cpu.c
@@ -756,6 +756,13 @@ static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2)
static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define rshi_u(r0,r1,i0) SHRWI_U(r1,i0,r0)
+#define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lroti(r0,r1,i0) rroti(r0,r1,32-i0)
+#define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
#define cmpr(c,r0,r1,r2) _cmpr(_jit,c,r0,r1,r2)
static void _cmpr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_int32_t);
#define cmpi(c,ci,r0,r1,i0) _cmpi(_jit,c,ci,r0,r1,i0)
@@ -2130,6 +2137,25 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+ fallback_lrotr(r0, r1, r2);
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+ fallback_rrotr(r0, r1, r2);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_word_t i0)
+{
+ movr(r0, r1);
+ SHRPWI(r0, r0, i0, r0);
+}
+
+static void
_cmpr(jit_state_t *_jit, jit_word_t c,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index 8dca952..995d4b1 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -1038,6 +1038,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rrr(movn,);
case_rrr(movz,);
case jit_code_casr:
@@ -1073,7 +1077,7 @@ _emit_code(jit_state_t *_jit)
#define ctor(r0, r1) fallback_cto(r0, r1)
#define ctzr(r0, r1) fallback_ctz(r0, r1)
#define rbitr(r0, r1) fallback_bitswap(r0, r1)
-#define popcntr(r0, r1) fallback_popcntr(r0, r1)
+#define popcntr(r0, r1) fallback_popcnt(r0, r1)
case_rr(clo,);
case_rr(clz,);
case_rr(cto,);
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index 34592c9..855ac44 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -1191,6 +1191,14 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2)
+#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0)
+#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2)
+#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0)
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
diff --git a/lib/jit_loongarch-cpu.c b/lib/jit_loongarch-cpu.c
index 2de50c4..aceff17 100644
--- a/lib/jit_loongarch-cpu.c
+++ b/lib/jit_loongarch-cpu.c
@@ -420,6 +420,11 @@ static void _remi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define rshi(r0, r1, i0) SRAI_D(r0, r1, i0)
# define rshr_u(r0, r1, r2) SRL_D(r0, r1, r2)
# define rshi_u(r0, r1, i0) SRLI_D(r0, r1, i0)
+# define lrotr(r0, r1, r2) _lrotr(_jit, r0, r1, r2)
+static void _lrotr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define lroti(r0, r1, i0) rroti(r0, r1, __WORDSIZE - i0)
+# define rrotr(r0, r1, r2) ROTR_D(r0, r1, r2)
+# define rroti(r0, r1, i0) ROTRI_D(r0, r1, i0)
# define andr(r0, r1, r2) AND(r0, r1, r2)
# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
static void _andi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
@@ -1331,6 +1336,22 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c
index 0718dfa..09fa541 100644
--- a/lib/jit_loongarch.c
+++ b/lib/jit_loongarch.c
@@ -1111,6 +1111,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
@@ -1118,7 +1122,7 @@ _emit_code(jit_state_t *_jit)
case_rr(cto,);
case_rr(ctz,);
case_rr(rbit,);
-#define popcntr(r0, r1) fallback_popcntr(r0, r1)
+#define popcntr(r0, r1) fallback_popcnt(r0, r1)
case_rr(popcnt,);
case_rrr(and,);
case_rrw(and,);
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 19d34a2..69f7ed0 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -413,7 +413,10 @@ static void _nop(jit_state_t*,jit_int32_t);
# define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU)
# define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM)
# define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
+# define ROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_SRLV)
# define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
+# define DROTR32(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL32)
+# define DROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_DSRLV)
# define SYNC() rrr_t(_ZERO_REGNO,_ZERO_REGNO,_ZERO_REGNO,MIPS_SYNC)
# define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
# define MFLO(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO)
@@ -620,6 +623,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0)
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define andr(r0,r1,r2) AND(r0,r1,r2)
# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -2199,6 +2209,58 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
#endif
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (jit_mips2_p()) {
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+ }
+ else
+ fallback_lrotr(r0, r1, r2);
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_mips2_p()) {
+#if __WORDSIZE == 32
+ ROTRV(r0, r1, r2);
+#else
+ DROTRV(r0, r1, r2);
+#endif
+ }
+ else
+ fallback_rrotr(r0, r1, r2);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= __WORDSIZE - 1);
+ if (jit_mips2_p()) {
+#if __WORDSIZE == 32
+ ROTR(r0, r1, i0);
+#else
+ if (i0 < 32)
+ DROTR(r0, r1, i0);
+ else
+ DROTR32(r0, r1, i0 - 32);
+#endif
+ }
+ else
+ fallback_lroti(r0, r1, i0);
+}
+
+static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index e5ae9e5..aca8fa3 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1560,6 +1560,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
@@ -1677,7 +1681,7 @@ _emit_code(jit_state_t *_jit)
case_rr(cto,);
case_rr(ctz,);
case_rr(rbit,);
-#define popcntr(r0, r1) fallback_popcntr(r0, r1)
+#define popcntr(r0, r1) fallback_popcnt(r0, r1)
case_rr(popcnt,);
case_rrr(lt,);
case_rrw(lt,);
diff --git a/lib/jit_names.c b/lib/jit_names.c
index 7d7a8a8..da19f5f 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -257,4 +257,6 @@ static char *code_name[] = {
"ctor", "ctzr",
"rbitr",
"popcntr",
+ "lrotr", "lroti",
+ "rrotr", "rroti",
};
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index f18bb75..4de46b3 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -669,6 +669,17 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define lrotr(r0,r1,r2) ROTLW(r0,r1,r2)
+# else
+# define lrotr(r0,r1,r2) ROTLD(r0,r1,r2)
+# endif
+# define lroti(r0,r1,i0) _lroti(_jit,r0,r1,i0)
+static void _lroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2)
static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define lti(r0,r1,i0) _lti(_jit,r0,r1,i0)
@@ -1689,6 +1700,50 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(r0, r1);
+ else {
+# if __WORDSIZE == 32
+ ROTLWI(r0, r1, i0);
+# else
+ RLDICL(r0, r1, i0, 0);
+# endif
+ }
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ lrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ lrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(r0, r1);
+ else {
+# if __WORDSIZE == 32
+ ROTRWI(r0, r1, i0);
+# else
+ RLDICL(r0, r1, 64 - i0, 0);
+# endif
+ }
+}
+
+static void
_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
CMPX(r1, r2);
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index fca1b47..23ac66e 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -1351,6 +1351,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c
index ce27737..f6e5f4d 100644
--- a/lib/jit_riscv.c
+++ b/lib/jit_riscv.c
@@ -1162,6 +1162,14 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2)
+#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0)
+#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2)
+#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0)
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
#define clor(r0, r1) fallback_clo(r0, r1)
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 0718938..b49940b 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -1078,9 +1078,16 @@ static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
# if __WORDSIZE == 32
# define negr(r0,r1) LCR(r0,r1)
+# define lrotr(r0,r1,r2) RLL(r0,r1,0,r2)
+# define lroti(r0,r1,i0) RLL(r0,r1,i0,0)
# else
# define negr(r0,r1) LCGR(r0,r1)
+# define lrotr(r0,r1,r2) RLLG(r0,r1,0,r2)
+# define lroti(r0,r1,i0) RLLG(r0,r1,i0,0)
# endif
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) lroti(r0,r1,__WORDSIZE-i0)
# define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
# define clzr(r0, r1) _clzr(_jit, r0, r1)
@@ -2992,6 +2999,22 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
#endif
static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ lrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ lrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
#if CHECK_FLOGR
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 0e5b356..df89882 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -1131,6 +1131,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index a60400d..18a3c4e 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1431,6 +1431,14 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2)
+#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0)
+#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2)
+#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0)
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(trunc, _f_i);
case_rr(trunc, _d_i);
#if __WORDSIZE == 64
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 4d9e529..6ba594e 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -289,6 +289,10 @@ static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
# define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
# define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
+# define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
+# define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
+# define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
+# define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
# define unr(code, r0) _unr(_jit, code, r0)
static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
# define inegr(r0) unr(X86_NEG, r0)
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 334a11e..1cb0356 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1803,6 +1803,10 @@ _emit_code(jit_state_t *_jit)
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(lrot,);
+ case_rrw(lrot,);
+ case_rrr(rrot,);
+ case_rrw(rrot,);
case_rr(neg,);
case_rr(com,);
case_rr(clo,);
diff --git a/lib/lightning.c b/lib/lightning.c
index b2eb9ab..f0ab28a 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1487,7 +1487,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
case jit_code_muli: case jit_code_divi: case jit_code_divi_u:
case jit_code_remi: case jit_code_remi_u: case jit_code_andi:
case jit_code_ori: case jit_code_xori: case jit_code_lshi:
- case jit_code_rshi: case jit_code_rshi_u: case jit_code_lti:
+ case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti:
+ case jit_code_rroti: case jit_code_lti:
case jit_code_lti_u: case jit_code_lei: case jit_code_lei_u:
case jit_code_eqi: case jit_code_gei: case jit_code_gei_u:
case jit_code_gti: case jit_code_gti_u: case jit_code_nei:
@@ -1524,7 +1525,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
case jit_code_mulr: case jit_code_divr: case jit_code_divr_u:
case jit_code_remr: case jit_code_remr_u: case jit_code_andr:
case jit_code_orr: case jit_code_xorr: case jit_code_lshr:
- case jit_code_rshr: case jit_code_rshr_u: case jit_code_ltr:
+ case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr:
+ case jit_code_rrotr: case jit_code_ltr:
case jit_code_ltr_u: case jit_code_ler: case jit_code_ler_u:
case jit_code_eqr: case jit_code_ger: case jit_code_ger_u:
case jit_code_gtr: case jit_code_gtr_u: case jit_code_ner:
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月15日 16:27:36 +0000

AltStyle によって変換されたページ (->オリジナル) /