author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年03月07日 18:05:11 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年03月07日 18:05:11 -0300 |
commit | a0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f (patch) | |
tree | 07955115b1818a18a063e99692d7ffe79caf6870 | |
parent | 87139e0f6c0c24db1458f5b7aca25f13bc4b6ac6 (diff) | |
download | lightning-a0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f.tar.gz |
-rw-r--r-- | ChangeLog | 29 | ||||
-rw-r--r-- | check/Makefile.am | 17 | ||||
-rw-r--r-- | check/all.tst | 5 | ||||
-rw-r--r-- | check/alu_rot.ok | 1 | ||||
-rw-r--r-- | check/alu_rot.tst | 39 | ||||
-rw-r--r-- | check/lightning.c | 6 | ||||
-rw-r--r-- | doc/body.texi | 8 | ||||
-rw-r--r-- | include/lightning.h.in | 7 | ||||
-rw-r--r-- | lib/jit_aarch64-cpu.c | 48 | ||||
-rw-r--r-- | lib/jit_aarch64.c | 4 | ||||
-rw-r--r-- | lib/jit_alpha.c | 8 | ||||
-rw-r--r-- | lib/jit_arm-cpu.c | 58 | ||||
-rw-r--r-- | lib/jit_arm.c | 4 | ||||
-rw-r--r-- | lib/jit_fallback.c | 131 | ||||
-rw-r--r-- | lib/jit_hppa-cpu.c | 26 | ||||
-rw-r--r-- | lib/jit_hppa.c | 6 | ||||
-rw-r--r-- | lib/jit_ia64.c | 8 | ||||
-rw-r--r-- | lib/jit_loongarch-cpu.c | 21 | ||||
-rw-r--r-- | lib/jit_loongarch.c | 6 | ||||
-rw-r--r-- | lib/jit_mips-cpu.c | 62 | ||||
-rw-r--r-- | lib/jit_mips.c | 6 | ||||
-rw-r--r-- | lib/jit_names.c | 2 | ||||
-rw-r--r-- | lib/jit_ppc-cpu.c | 55 | ||||
-rw-r--r-- | lib/jit_ppc.c | 4 | ||||
-rw-r--r-- | lib/jit_riscv.c | 8 | ||||
-rw-r--r-- | lib/jit_s390-cpu.c | 23 | ||||
-rw-r--r-- | lib/jit_s390.c | 4 | ||||
-rw-r--r-- | lib/jit_sparc.c | 8 | ||||
-rw-r--r-- | lib/jit_x86-cpu.c | 4 | ||||
-rw-r--r-- | lib/jit_x86.c | 4 | ||||
-rw-r--r-- | lib/lightning.c | 6 |
@@ -1,3 +1,32 @@ +2023年03月07日 Paulo Andrade <pcpa@gnu.org> + + * check/alu_rot.tst, check/alu_rot.ok: New test files for the new + lrotr, lroti, rrotr and rroti instructions. + * check/Makefile.am, check/lightning.c, include/lightning.h.in, + lib/jit_names.c: lib/lightning.c, doc/body.texi: Update for the + new instructions. + * lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_arm-cpu.c, + lib/jit_arm.c: Implement optimized rrotr and rroti. lrotr and + lroti just adjust parameters for a left shift rotate. + * lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_ia64-cpu, + lib/jit_ia64.c, lib/jit_riscv-cpu.c, lib/jit_riscv.c, + jit_sparc-cpu.c, jit_sparc.c: Implement calls to fallback lrotr, + lroti, rrotr and rroti. + * lib/jit_hppa-cpu.c, lib/jit_hppa.c: Implement optimized rroti. + Other instructions use fallbacks. + * lib/jit_loongarch-cpu.c, lib/jit_loongarch.c: Implement optimized + rrotr and rroti. lrotr and lroti just adapt arguments and use a + right shift. + * lib/jit_mips-cpu.c, lib/jit_mips.c: If mips2, Implement optimized + rrotr and rroti. lrotr and lroti just adapt arguments and use a + right shift. If mips1 use fallbacks. + * lib/jit_ppc-cpu.c, lib/jit_ppc.c, jit_s390-cpu.c, jit_s390.c, + lib/jit_x86-cpu.c, lib/jit_x86.c: Implement optimized lrotr, + lroti, rrotr, rroti. + * lib/jit_fallback.c: Implement fallbacks for lrotr, lroti, + rrotr and rroti. Also add extra macro to avoid segfaults in s390, + that cannot use register zero for some addressing instructions. + 2023年03月02日 Paulo Andrade <pcpa@gnu.org> * check/popcnt.tst, check/popcnt.ok: New test files for the new diff --git a/check/Makefile.am b/check/Makefile.am index f24430e..0a49ac9 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -94,6 +94,7 @@ EXTRA_DIST = \ alu_rsh.tst alu_rsh.ok \ alu_com.tst alu_com.ok \ alu_neg.tst alu_neg.ok \ + alu_rot.tst alu_rot.ok \ movzr.tst movzr.ok \ fop_abs.tst fop_abs.ok \ fop_sqrt.tst fop_sqrt.ok \ @@ -140,7 +141,7 @@ base_TESTS = \ alu_mul alu_div alu_rem \ alu_and alu_or alu_xor \ alu_lsh alu_rsh \ - alu_com alu_neg \ + alu_com alu_neg alu_rot \ movzr \ fop_abs fop_sqrt \ varargs stack \ @@ -168,7 +169,7 @@ x87_TESTS = \ alu_sub.x87 alux_sub.x87 alu_rsb.x87 \ alu_mul.x87 alu_div.x87 alu_rem.x87 \ alu_and.x87 alu_or.x87 alu_xor.x87 \ - alu_lsh.x87 alu_rsh.x87 \ + alu_lsh.x87 alu_rsh.x87 alu_rot.x87 \ alu_com.x87 alu_neg.x87 \ movzr.x87 \ fop_abs.x87 fop_sqrt.x87 \ @@ -192,7 +193,7 @@ x87_nodata_TESTS = \ alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata \ alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \ alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ - alu_lsh.x87.nodata alu_rsh.x87.nodata \ + alu_lsh.x87.nodata alu_rsh.x87.nodata alu_rot.x87.nodata \ alu_com.x87.nodata alu_neg.x87.nodata \ movzr.x87.nodata \ fop_abs.x87.nodata fop_sqrt.x87.nodata \ @@ -218,7 +219,7 @@ arm_TESTS = \ alu_sub.arm alux_sub.arm alu_rsb.arm \ alu_mul.arm alu_div.arm alu_rem.arm \ alu_and.arm alu_or.arm alu_xor.arm \ - alu_lsh.arm alu_rsh.arm \ + alu_lsh.arm alu_rsh.arm alu_rot.arm \ alu_com.arm alu_neg.arm \ movzr.arm \ fop_abs.arm fop_sqrt.arm \ @@ -244,7 +245,7 @@ swf_TESTS = \ alu_sub.swf alux_sub.swf alu_rsb.swf \ alu_mul.swf alu_div.swf alu_rem.swf \ alu_and.swf alu_or.swf alu_xor.swf \ - alu_lsh.swf alu_rsh.swf \ + alu_lsh.swf alu_rsh.swf alu_rot.swf \ alu_com.swf alu_neg.swf \ movzr.swf \ fop_abs.swf fop_sqrt.swf \ @@ -268,7 +269,7 @@ arm_swf_TESTS = \ alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \ alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf \ alu_and.arm.swf alu_or.arm.swf alu_xor.arm.swf \ - alu_lsh.arm.swf alu_rsh.arm.swf \ + alu_lsh.arm.swf alu_rsh.arm.swf alu_rot.arm.swf \ alu_com.arm.swf alu_neg.arm.swf \ movzr.arm.swf \ fop_abs.arm.swf fop_sqrt.arm.swf \ @@ -293,7 +294,7 @@ arm4_swf_TESTS = \ alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \ alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf \ alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \ - alu_lsh.arm4.swf alu_rsh.arm4.swf \ + alu_lsh.arm4.swf alu_rsh.arm4.swf alu_rot.arm4.swf \ alu_com.arm4.swf alu_neg.arm4.swf \ movzr.arm4.swf \ fop_abs.arm4.swf fop_sqrt.arm4.swf \ @@ -321,7 +322,7 @@ nodata_TESTS = \ alu_sub.nodata alux_sub.nodata alu_rsb.nodata \ alu_mul.nodata alu_div.nodata alu_rem.nodata \ alu_and.nodata alu_or.nodata alu_xor.nodata \ - alu_lsh.nodata alu_rsh.nodata \ + alu_lsh.nodata alu_rsh.nodata alu_rot.nodata \ alu_com.nodata alu_neg.nodata \ movzr.nodata \ fop_abs.nodata fop_sqrt.nodata \ diff --git a/check/all.tst b/check/all.tst index d24f7ae..db3b870 100644 --- a/check/all.tst +++ b/check/all.tst @@ -88,6 +88,11 @@ clzr %r0 %r1 ctor %r0 %r1 ctzr %r0 %r1 + popcntr %r0 %r1 + lrotr %r0 %r1 %r2 + lroti %r0 %r1 0x1f + rrotr %r0 %r1 %r2 + rroti %r0 %r1 0x1f ltr %r0 %r1 %r2 lti %r0 %r1 2 ltr_u %r0 %r1 %r2 diff --git a/check/alu_rot.ok b/check/alu_rot.ok new file mode 100644 index 0000000..9766475 --- /dev/null +++ b/check/alu_rot.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_rot.tst b/check/alu_rot.tst new file mode 100644 index 0000000..6ab7bf0 --- /dev/null +++ b/check/alu_rot.tst @@ -0,0 +1,39 @@ +#include "alu.inc" + +.code + prolog + +#define LROT(N, I0, I1, V) ALU(N, , lrot, I0, I1, V) + LROT(0, 0x7f, 1, 0xfe) +#if __WORDSIZE == 32 + LROT(1, 0xfffffffe, 31, 0x7fffffff) + LROT(2, 0x12345678, 11, 0xa2b3c091) + LROT(3, 0x80000001, 1, 0x03) +#else + LROT(1, 0xfffffffffffffffe, 31, 0xffffffff7fffffff) + LROT(2, 0x123456789abcdef0, 43, 0xe6f78091a2b3c4d5) + LROT(3, 0x00000001ffffffff, 32, 0xffffffff00000001) + LROT(4, 0x80000001, 33, 0x200000001) + LROT(5, 0x8000000000, 35, 0x400) +#endif + +#define RROT(N, I0, I1, V) ALU(N, , rrot, I0, I1, V) + RROT(0, 0xfe, 1, 0x7f) +#if __WORDSIZE == 32 + RROT(1, 0xfffffffe, 31, 0xfffffffd) + RROT(2, 0x12345678, 11, 0xcf02468a) + RROT(3, 0x80000001, 3, 0x30000000) +#else + RROT(1, 0xfffffffffffffffe, 31, 0xfffffffdffffffff) + RROT(2, 0x123456789abcdef0, 43, 0xcf13579bde02468a) + RROT(3, 0x00000001ffffffff, 32, 0xffffffff00000001) + RROT(4, 0x80000001, 33, 0x4000000080000000) + RROT(5, 0x8000000000, 35, 0x10) +#endif + + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index daa3786..19bf06f 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -320,6 +320,8 @@ static void xorr(void); static void xori(void); static void lshr(void); static void lshi(void); static void rshr(void); static void rshi(void); static void rshr_u(void); static void rshi_u(void); +static void lrotr(void); static void lroti(void); +static void rrotr(void); static void rroti(void); static void negr(void); static void comr(void); static void clor(void); static void clzr(void); static void ctor(void); static void ctzr(void); @@ -682,6 +684,8 @@ static instr_t instr_vector[] = { entry(lshr), entry(lshi), entry(rshr), entry(rshi), entry(rshr_u), entry(rshi_u), + entry(lrotr), entry(lroti), + entry(rrotr), entry(rroti), entry(negr), entry(comr), entry(clor), entry(clzr), entry(ctor), entry(ctzr), @@ -1537,6 +1541,8 @@ entry_ir_ir_ir(xorr) entry_ir_ir_im(xori) entry_ir_ir_ir(lshr) entry_ir_ir_im(lshi) entry_ir_ir_ir(rshr) entry_ir_ir_im(rshi) entry_ir_ir_ir(rshr_u) entry_ir_ir_im(rshi_u) +entry_ir_ir_ir(lrotr) entry_ir_ir_im(lroti) +entry_ir_ir_ir(rrotr) entry_ir_ir_im(rroti) entry_ir_ir(negr) entry_ir_ir(comr) entry_ir_ir(clor) entry_ir_ir(clzr) entry_ir_ir(ctor) entry_ir_ir(ctzr) diff --git a/doc/body.texi b/doc/body.texi index aec96f1..1c6d23e 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -261,10 +261,18 @@ lshr O1 = O2 << O3 lshi O1 = O2 << O3 rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +lrotr O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3)) +lroti O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3)) +rrotr O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3)) +rroti O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3)) movzr O1 = O3 ? O1 : O2 movnr O1 = O3 ? O2 : O1 @end example +Note that @code{lrotr}, @code{lroti}, @code{rrotr} and @code{rroti} +are described as the fallback operation. These are bit shift/rotation +operation. + @item Four operand binary ALU operations These accept two result registers, and two operands; the last one can be an immediate. The first two arguments cannot be the same register. diff --git a/include/lightning.h.in b/include/lightning.h.in index 1aae759..f0c96e2 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -1062,6 +1062,13 @@ typedef enum { #define jit_popcntr(u,v) jit_new_node_ww(jit_code_popcntr,u,v) jit_code_popcntr, +#define jit_lrotr(u,v,w) jit_new_node_www(jit_code_lrotr,u,v,w) +#define jit_lroti(u,v,w) jit_new_node_www(jit_code_lroti,u,v,w) + jit_code_lrotr, jit_code_lroti, +#define jit_rrotr(u,v,w) jit_new_node_www(jit_code_rrotr,u,v,w) +#define jit_rroti(u,v,w) jit_new_node_www(jit_code_rroti,u,v,w) + jit_code_rrotr, jit_code_rroti, + jit_code_last_code } jit_code_t; diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 99d8756..66aaa5f 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -297,6 +297,8 @@ typedef union { # define A64_LSL 0x1ac02000 # define A64_LSR 0x1ac02400 # define A64_ASR 0x1ac02800 +# define A64_RORV 0x1ac02c00 +# define A64_EXTR 0x13800000 # define A64_MUL 0x1b007c00 # define A64_SMULL 0x9b207c00 # define A64_SMULH 0x9b407c00 @@ -416,6 +418,9 @@ typedef union { # define ASRI(r0,r1,i0) SBFM(r0,r1,i0,63) # define LSR(Rd,Rn,Rm) oxxx(A64_LSR|XS,Rd,Rn,Rm) # define LSRI(r0,r1,i0) UBFM(r0,r1,i0,63) +# define RORV(Rd,Rn,Rm) oxxx(A64_RORV|XS,Rd,Rn,Rm) +# define EXTR(Rd,Rn,Rm,Im) oxxx6(A64_EXTR|XS|DS,Rm,Im,Rn,Rd) +# define ROR(Rd,Rn,Rm,Im) EXTR(Rd,Rn,Rm,Im) # define AND(Rd,Rn,Rm) oxxx(A64_AND|XS,Rd,Rn,Rm) /* actually should use oxxrs but logical_immediate returns proper encoding */ # define ANDI(Rd,Rn,Imm12) oxxi(A64_ANDI|XS,Rd,Rn,Imm12) @@ -514,6 +519,10 @@ static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t, # define oxxx7(Op,Rt,Rt2,Rn,Simm7) _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7) static void _oxxx7(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxx6(Op,Rm,Imm6,Rn,Rd) _oxxx6(_jit,Op,Rm,Imm6,Rn,Rd) +static void _oxxx6(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); + # define nop(i0) _nop(_jit,i0) static void _nop(jit_state_t*,jit_int32_t); # define addr(r0,r1,r2) ADD(r0,r1,r2) @@ -584,6 +593,11 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshr_u(r0,r1,r2) LSR(r0,r1,r2) # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lroti(r0,r1,i0) rroti(r0,r1,64-i0) +# define rrotr(r0,r1,r2) RORV(r0,r1,r2) +# define rroti(r0,r1,i0) ROR(r0,r1,r1,i0) # define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) @@ -1033,6 +1047,24 @@ _oxxx7(jit_state_t *_jit, jit_int32_t Op, } static void +_oxxx6(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rm, jit_int32_t Imm6, jit_int32_t Rn, jit_int32_t Rd) +{ + instr_t i; + assert(!(Rm & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Rd & ~0x1f)); + assert(Imm6 >= 0 && Imm6 <= 63); + assert(!(Op & ~0xffe0fc00)); + i.w = Op; + i.Rm.b = Rm; + i.imm6.b = Imm6; + i.Rn.b = Rn; + i.Rd.b = Rd; + ii(i.w); +} + +static void _nop(jit_state_t *_jit, jit_int32_t i0) { for (; i0 > 0; i0 -= 4) @@ -1381,6 +1413,22 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, 64); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, 64); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { CMPI(r2, 0); diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index f3fe712..b545bfe 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1442,6 +1442,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index dd20661..b87719b 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -1035,6 +1035,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2) +#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0) +#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2) +#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0) + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rrr(and,); case_rrw(and,); case_rrr(or,); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index c75c13f..c450679 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -224,6 +224,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB_ASRI 0x1000 # define THUMB2_ASRI 0xea4f0020 # define ARM_ROR 0x00000060 +# define THUMB_ROR 0x41c0 +# define THUMB2_ROR 0xfa60f000 +# define THUMB2_RORI 0xea4f0030 # define ARM_CMP 0x01500000 # define THUMB_CMP 0x4280 # define THUMB_CMPX 0x4500 @@ -637,6 +640,13 @@ static void _tdmb(jit_state_t *_jit, int im); # define ASRI(rd,rn,im) CC_ASRI(ARM_CC_AL,rd,rn,im) # define T1_ASRI(rd,rm,im) is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd)) # define T2_ASRI(rd,rm,im) tshift(THUMB2_ASRI,rd,rm,im) +# define CC_ROR(cc,rd,rn,rm) CC_SHIFT(cc,ARM_ROR|ARM_R,rd,rm,rn,0) +# define ROR(rd,rn,rm) CC_ROR(ARM_CC_AL,rd,rn,rm) +# define T1_ROR(rdn,rm) is(THUMB_ROR|(_u3(rm)<<3)|_u3(rdn)) +# define T2_ROR(rd,rn,rm) torrr(THUMB2_ROR,rn,rd,rm) +# define CC_RORI(cc,rd,rn,im) CC_SHIFT(cc,ARM_ROR,rd,0,rn,im) +# define RORI(rd,rn,im) CC_RORI(ARM_CC_AL,rd,rn,im) +# define T2_RORI(rd,rm,im) tshift(THUMB2_RORI,rd,rm,im) # define CC_CMP(cc,rn,rm) corrr(cc,ARM_CMP,rn,0,rm) # define CMP(rn,rm) CC_CMP(ARM_CC_AL,rn,rm) # define T1_CMP(rn,rm) is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn)) @@ -1010,6 +1020,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lroti(r0,r1,i0) rroti(r0,r1,32-i0) +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ccr(ct,cf,r0,r1,r2) _ccr(_jit,ct,cf,r0,r1,r2) static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t); # define cci(ct,cf,r0,r1,i0) _cci(_jit,ct,cf,r0,r1,i0) @@ -2722,6 +2739,47 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, 64); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, 64); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_thumb_p()) { + if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1) + T1_ROR(r0, r2); + else + T2_ROR(r0, r1, r2); + } + else + ROR(r0, r1, r2); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= 31); + if (i0 == 0) + movr(r0, r1); + else if (jit_thumb_p()) + T2_RORI(r0, r1, i0); + else + RORI(r0, r1, i0); +} + +static void _ccr(jit_state_t *_jit, int ct, int cf, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_arm.c b/lib/jit_arm.c index e64230f..953298f 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1580,6 +1580,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c index 55bf7d3..bfc547b 100644 --- a/lib/jit_fallback.c +++ b/lib/jit_fallback.c @@ -27,6 +27,19 @@ static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t); static void _fallback_bitswap(jit_state_t*, jit_int32_t, jit_int32_t); #define fallback_popcnt(r0,r1) _fallback_popcnt(_jit, r0, r1) static void _fallback_popcnt(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_lrotr(r0, r1, r2) _fallback_lrotr(_jit, r0, r1, r2) +static void _fallback_lrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_lroti(r0, r1, i0) _fallback_lroti(_jit, r0, r1, i0) +static void _fallback_lroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_rrotr(r0, r1, r2) _fallback_rrotr(_jit, r0, r1, r2) +static void _fallback_rrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_rroti(r0, r1, i0) _fallback_rroti(_jit, r0, r1, i0) +static void _fallback_rroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t); +# if defined(__s390__) || defined(__s390x__) +# define fallback_jit_get_reg(flags) jit_get_reg_but_zero(flags) +# else +# define fallback_jit_get_reg(flags) jit_get_reg(flags) +# endif # if defined(__ia64__) # define fallback_flush() sync() # elif defined(__mips__) @@ -198,7 +211,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* XXX only attempts to fallback cas for lightning jit code */ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; if ((iscasi = r1 == _NOREG)) { - r1_reg = jit_get_reg(jit_class_gpr|jit_class_sav); + r1_reg = fallback_jit_get_reg(jit_class_gpr|jit_class_sav); r1 = rn(r1_reg); movi(r1, i0); } @@ -273,8 +286,8 @@ _fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) }; jit_int32_t t0, t1; jit_word_t loop, done; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); movi(rn(t0), __WORDSIZE - 8); fallback_flush(); loop = _jit->pc.w; @@ -299,9 +312,9 @@ _fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) clz = fallback_jmpi(_jit->pc.w); fallback_flush(); fallback_patch_bnei(l32, _jit->pc.w); - r2_reg = jit_get_reg(jit_class_gpr); + r2_reg = fallback_jit_get_reg(jit_class_gpr); r2 = rn(r2_reg); - r1_reg = jit_get_reg(jit_class_gpr); + r1_reg = fallback_jit_get_reg(jit_class_gpr); movr(rn(r1_reg), r1); r1 = rn(r1_reg); movi(r0, 0); @@ -392,7 +405,7 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) }; /* return mod67[(-r1 & r1) % 67]; */ # endif - t0 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); if (r0 == r1) { negr(rn(t0), r1); andr(r0, rn(t0), r1); @@ -418,9 +431,9 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) ctz = fallback_jmpi(_jit->pc.w); fallback_flush(); fallback_patch_bnei(l32, _jit->pc.w); - r2_reg = jit_get_reg(jit_class_gpr); + r2_reg = fallback_jit_get_reg(jit_class_gpr); r2 = rn(r2_reg); - r1_reg = jit_get_reg(jit_class_gpr); + r1_reg = fallback_jit_get_reg(jit_class_gpr); movr(rn(r1_reg), r1); r1 = rn(r1_reg); movi(r0, 0); @@ -524,16 +537,16 @@ _fallback_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) 31, 159, 95, 223, 63, 191, 127, 255 }; if (r0 == r1) { - t0 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); r1_reg = rn(t0); } else { t0 = JIT_NOREG; r1_reg = r1; } - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - t3 = jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); if (r0 == r1) movr(rn(t0), r1); extr_uc(rn(t1), r1_reg); @@ -572,9 +585,9 @@ v = ( v >> 16 ) | ( v << 16); */ jit_int32_t t0, t1, t2, t3, t4; movr(r0, r1); - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ @@ -631,20 +644,20 @@ while ((s >>= 1) > 0) jit_int32_t s, mask; jit_word_t loop, done, t0, t1; movr(v, r1); - s = jit_get_reg(jit_class_gpr); + s = fallback_jit_get_reg(jit_class_gpr); movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */ - mask = jit_get_reg(jit_class_gpr); + mask = fallback_jit_get_reg(jit_class_gpr); movi(rn(mask), ~0L); /* mask = ~0; */ flush(); loop = _jit->pc.w; /* while ((s >>= 1) > 0) */ rshi(rn(s), rn(s), 1); /* (s >>= 1) */ done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */ - t0 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */ xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */ rshr(rn(t0), v, rn(s)); /* t0 = v >> s */ andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */ - t1 = jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); lshr(rn(t1), v, rn(s)); /* t1 = v << s */ comr(v, rn(mask)); /* v = ~mask */ andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */ @@ -690,16 +703,16 @@ _fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 }; if (r0 == r1) { - t0 = jit_get_reg(jit_class_gpr); + t0 = fallback_jit_get_reg(jit_class_gpr); r1_reg = rn(t0); } else { t0 = JIT_NOREG; r1_reg = r1; } - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - t3 = jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); if (r0 == r1) movr(rn(t0), r1); extr_uc(rn(t1), r1_reg); @@ -720,4 +733,76 @@ _fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (t0 != JIT_NOREG) jit_unget_reg(t0); } + +static void +_fallback_lrotr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + /* r0 = (r1 << r2) | (r1 >> (__WORDSIZE - r2)) */ + jit_int32_t t0, t1; + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1 || r0 == r2) { + t1 = fallback_jit_get_reg(jit_class_gpr); + lshr(rn(t0), r1, r2); + rsbi(rn(t1), r2, __WORDSIZE); + rshr_u(rn(t1), r1, rn(t1)); + orr(r0, rn(t0), rn(t1)); + jit_unget_reg(t1); + } + else { + lshr(r0, r1, r2); + rsbi(rn(t0), r2, __WORDSIZE); + rshr_u(rn(t0), r1, rn(t0)); + orr(r0, r0, rn(t0)); + } + jit_unget_reg(t0); +} + +static void +_fallback_lroti(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = fallback_jit_get_reg(jit_class_gpr); + lshi(rn(t0), r1, i0); + rshi_u(r0, r1, __WORDSIZE - i0); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_fallback_rrotr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + /* r0 = (r1 >> r2) | (r1 << (__WORDSIZE - r2)) */ + jit_int32_t t0, t1; + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1 || r0 == r2) { + t1 = fallback_jit_get_reg(jit_class_gpr); + rshr_u(rn(t0), r1, r2); + rsbi(rn(t1), r2, __WORDSIZE); + lshr(rn(t1), r1, rn(t1)); + orr(r0, rn(t0), rn(t1)); + jit_unget_reg(t1); + } + else { + rshr_u(r0, r1, r2); + rsbi(rn(t0), r2, __WORDSIZE); + lshr(rn(t0), r1, rn(t0)); + orr(r0, r0, rn(t0)); + } + jit_unget_reg(t0); +} + +static void +_fallback_rroti(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = fallback_jit_get_reg(jit_class_gpr); + rshi_u(rn(t0), r1, i0); + lshi(r0, r1, __WORDSIZE - i0); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} #endif diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 3ddf725..fb02efe 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -756,6 +756,13 @@ static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define rshi_u(r0,r1,i0) SHRWI_U(r1,i0,r0) +#define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +#define lroti(r0,r1,i0) rroti(r0,r1,32-i0) +#define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +#define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define cmpr(c,r0,r1,r2) _cmpr(_jit,c,r0,r1,r2) static void _cmpr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_int32_t); #define cmpi(c,ci,r0,r1,i0) _cmpi(_jit,c,ci,r0,r1,i0) @@ -2130,6 +2137,25 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) } static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) +{ + fallback_lrotr(r0, r1, r2); +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) +{ + fallback_rrotr(r0, r1, r2); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_word_t i0) +{ + movr(r0, r1); + SHRPWI(r0, r0, i0, r0); +} + +static void _cmpr(jit_state_t *_jit, jit_word_t c, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 8dca952..995d4b1 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -1038,6 +1038,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rrr(movn,); case_rrr(movz,); case jit_code_casr: @@ -1073,7 +1077,7 @@ _emit_code(jit_state_t *_jit) #define ctor(r0, r1) fallback_cto(r0, r1) #define ctzr(r0, r1) fallback_ctz(r0, r1) #define rbitr(r0, r1) fallback_bitswap(r0, r1) -#define popcntr(r0, r1) fallback_popcntr(r0, r1) +#define popcntr(r0, r1) fallback_popcnt(r0, r1) case_rr(clo,); case_rr(clz,); case_rr(cto,); diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 34592c9..855ac44 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1191,6 +1191,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2) +#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0) +#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2) +#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0) + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); diff --git a/lib/jit_loongarch-cpu.c b/lib/jit_loongarch-cpu.c index 2de50c4..aceff17 100644 --- a/lib/jit_loongarch-cpu.c +++ b/lib/jit_loongarch-cpu.c @@ -420,6 +420,11 @@ static void _remi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rshi(r0, r1, i0) SRAI_D(r0, r1, i0) # define rshr_u(r0, r1, r2) SRL_D(r0, r1, r2) # define rshi_u(r0, r1, i0) SRLI_D(r0, r1, i0) +# define lrotr(r0, r1, r2) _lrotr(_jit, r0, r1, r2) +static void _lrotr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define lroti(r0, r1, i0) rroti(r0, r1, __WORDSIZE - i0) +# define rrotr(r0, r1, r2) ROTR_D(r0, r1, r2) +# define rroti(r0, r1, i0) ROTRI_D(r0, r1, i0) # define andr(r0, r1, r2) AND(r0, r1, r2) # define andi(r0, r1, i0) _andi(_jit, r0, r1, i0) static void _andi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -1331,6 +1336,22 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_loongarch.c b/lib/jit_loongarch.c index 0718dfa..09fa541 100644 --- a/lib/jit_loongarch.c +++ b/lib/jit_loongarch.c @@ -1111,6 +1111,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); @@ -1118,7 +1122,7 @@ _emit_code(jit_state_t *_jit) case_rr(cto,); case_rr(ctz,); case_rr(rbit,); -#define popcntr(r0, r1) fallback_popcntr(r0, r1) +#define popcntr(r0, r1) fallback_popcnt(r0, r1) case_rr(popcnt,); case_rrr(and,); case_rrw(and,); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 19d34a2..69f7ed0 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -413,7 +413,10 @@ static void _nop(jit_state_t*,jit_int32_t); # define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU) # define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM) # define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL) +# define ROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_SRLV) # define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL) +# define DROTR32(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL32) +# define DROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_DSRLV) # define SYNC() rrr_t(_ZERO_REGNO,_ZERO_REGNO,_ZERO_REGNO,MIPS_SYNC) # define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI) # define MFLO(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO) @@ -620,6 +623,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0) +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define andr(r0,r1,r2) AND(r0,r1,r2) # define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -2199,6 +2209,58 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) #endif static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_mips2_p()) { + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + } + else + fallback_lrotr(r0, r1, r2); +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_mips2_p()) { +#if __WORDSIZE == 32 + ROTRV(r0, r1, r2); +#else + DROTRV(r0, r1, r2); +#endif + } + else + fallback_rrotr(r0, r1, r2); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= __WORDSIZE - 1); + if (jit_mips2_p()) { +#if __WORDSIZE == 32 + ROTR(r0, r1, i0); +#else + if (i0 < 32) + DROTR(r0, r1, i0); + else + DROTR32(r0, r1, i0 - 32); +#endif + } + else + fallback_lroti(r0, r1, i0); +} + +static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_mips.c b/lib/jit_mips.c index e5ae9e5..aca8fa3 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1560,6 +1560,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1677,7 +1681,7 @@ _emit_code(jit_state_t *_jit) case_rr(cto,); case_rr(ctz,); case_rr(rbit,); -#define popcntr(r0, r1) fallback_popcntr(r0, r1) +#define popcntr(r0, r1) fallback_popcnt(r0, r1) case_rr(popcnt,); case_rrr(lt,); case_rrw(lt,); diff --git a/lib/jit_names.c b/lib/jit_names.c index 7d7a8a8..da19f5f 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -257,4 +257,6 @@ static char *code_name[] = { "ctor", "ctzr", "rbitr", "popcntr", + "lrotr", "lroti", + "rrotr", "rroti", }; diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index f18bb75..4de46b3 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -669,6 +669,17 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define lrotr(r0,r1,r2) ROTLW(r0,r1,r2) +# else +# define lrotr(r0,r1,r2) ROTLD(r0,r1,r2) +# endif +# define lroti(r0,r1,i0) _lroti(_jit,r0,r1,i0) +static void _lroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2) static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) @@ -1689,6 +1700,50 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_lroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { +# if __WORDSIZE == 32 + ROTLWI(r0, r1, i0); +# else + RLDICL(r0, r1, i0, 0); +# endif + } +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + lrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + lrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { +# if __WORDSIZE == 32 + ROTRWI(r0, r1, i0); +# else + RLDICL(r0, r1, 64 - i0, 0); +# endif + } +} + +static void _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { CMPX(r1, r2); diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index fca1b47..23ac66e 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1351,6 +1351,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c index ce27737..f6e5f4d 100644 --- a/lib/jit_riscv.c +++ b/lib/jit_riscv.c @@ -1162,6 +1162,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2) +#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0) +#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2) +#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0) + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); #define clor(r0, r1) fallback_clo(r0, r1) diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 0718938..b49940b 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -1078,9 +1078,16 @@ static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif # if __WORDSIZE == 32 # define negr(r0,r1) LCR(r0,r1) +# define lrotr(r0,r1,r2) RLL(r0,r1,0,r2) +# define lroti(r0,r1,i0) RLL(r0,r1,i0,0) # else # define negr(r0,r1) LCGR(r0,r1) +# define lrotr(r0,r1,r2) RLLG(r0,r1,0,r2) +# define lroti(r0,r1,i0) RLLG(r0,r1,i0,0) # endif +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) lroti(r0,r1,__WORDSIZE-i0) # define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); # define clzr(r0, r1) _clzr(_jit, r0, r1) @@ -2992,6 +2999,22 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) #endif static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + lrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + lrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { #if CHECK_FLOGR diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 0e5b356..df89882 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -1131,6 +1131,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index a60400d..18a3c4e 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -1431,6 +1431,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2) +#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0) +#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2) +#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0) + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(trunc, _f_i); case_rr(trunc, _d_i); #if __WORDSIZE == 64 diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 4d9e529..6ba594e 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -289,6 +289,10 @@ static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0) # define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2) # define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0) +# define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2) +# define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0) +# define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2) +# define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0) # define unr(code, r0) _unr(_jit, code, r0) static void _unr(jit_state_t*, jit_int32_t, jit_int32_t); # define inegr(r0) unr(X86_NEG, r0) diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 334a11e..1cb0356 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1803,6 +1803,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); diff --git a/lib/lightning.c b/lib/lightning.c index b2eb9ab..f0ab28a 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1487,7 +1487,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_muli: case jit_code_divi: case jit_code_divi_u: case jit_code_remi: case jit_code_remi_u: case jit_code_andi: case jit_code_ori: case jit_code_xori: case jit_code_lshi: - case jit_code_rshi: case jit_code_rshi_u: case jit_code_lti: + case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti: + case jit_code_rroti: case jit_code_lti: case jit_code_lti_u: case jit_code_lei: case jit_code_lei_u: case jit_code_eqi: case jit_code_gei: case jit_code_gei_u: case jit_code_gti: case jit_code_gti_u: case jit_code_nei: @@ -1524,7 +1525,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: case jit_code_remr: case jit_code_remr_u: case jit_code_andr: case jit_code_orr: case jit_code_xorr: case jit_code_lshr: - case jit_code_rshr: case jit_code_rshr_u: case jit_code_ltr: + case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr: + case jit_code_rrotr: case jit_code_ltr: case jit_code_ltr_u: case jit_code_ler: case jit_code_ler_u: case jit_code_eqr: case jit_code_ger: case jit_code_ger_u: case jit_code_gtr: case jit_code_gtr_u: case jit_code_ner: |