Implement new bit rotate instructions. - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
path: root/lib/jit_aarch64-cpu.c
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月07日 18:05:11 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月07日 18:05:11 -0300
commita0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f (patch)
tree07955115b1818a18a063e99692d7ffe79caf6870 /lib/jit_aarch64-cpu.c
parent87139e0f6c0c24db1458f5b7aca25f13bc4b6ac6 (diff)
downloadlightning-a0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f.tar.gz
Implement new bit rotate instructions.
This commit also corrects some previous changes that were not properly tested and were failing to compile or having runtime problems, like using register 0 for addressing in s390. Still need to test on actual s390, as it fails in Hercules, but has the same encoding as shifts. For the moment presume it is a bug in the Hercules emulator. * check/alu_rot.tst, check/alu_rot.ok: New test files for the new lrotr, lroti, rrotr and rroti instructions. * check/Makefile.am, check/lightning.c, include/lightning.h.in, lib/jit_names.c: lib/lightning.c, doc/body.texi: Update for the new instructions. * lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_arm-cpu.c, lib/jit_arm.c: Implement optimized rrotr and rroti. lrotr and lroti just adjust parameters for a left shift rotate. * lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_ia64-cpu, lib/jit_ia64.c, lib/jit_riscv-cpu.c, lib/jit_riscv.c, jit_sparc-cpu.c, jit_sparc.c: Implement calls to fallback lrotr, lroti, rrotr and rroti. * lib/jit_hppa-cpu.c, lib/jit_hppa.c: Implement optimized rroti. Other instructions use fallbacks. * lib/jit_loongarch-cpu.c, lib/jit_loongarch.c: Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. * lib/jit_mips-cpu.c, lib/jit_mips.c: If mips2, Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. If mips1 use fallbacks. * lib/jit_ppc-cpu.c, lib/jit_ppc.c, jit_s390-cpu.c, jit_s390.c, lib/jit_x86-cpu.c, lib/jit_x86.c: Implement optimized lrotr, lroti, rrotr, rroti. * lib/jit_fallback.c: Implement fallbacks for lrotr, lroti, rrotr and rroti. Also add extra macro to avoid segfaults in s390, that cannot use register zero for some addressing instructions.
Diffstat (limited to 'lib/jit_aarch64-cpu.c')
-rw-r--r--lib/jit_aarch64-cpu.c 48
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index 99d8756..66aaa5f 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -297,6 +297,8 @@ typedef union {
# define A64_LSL 0x1ac02000
# define A64_LSR 0x1ac02400
# define A64_ASR 0x1ac02800
+# define A64_RORV 0x1ac02c00
+# define A64_EXTR 0x13800000
# define A64_MUL 0x1b007c00
# define A64_SMULL 0x9b207c00
# define A64_SMULH 0x9b407c00
@@ -416,6 +418,9 @@ typedef union {
# define ASRI(r0,r1,i0) SBFM(r0,r1,i0,63)
# define LSR(Rd,Rn,Rm) oxxx(A64_LSR|XS,Rd,Rn,Rm)
# define LSRI(r0,r1,i0) UBFM(r0,r1,i0,63)
+# define RORV(Rd,Rn,Rm) oxxx(A64_RORV|XS,Rd,Rn,Rm)
+# define EXTR(Rd,Rn,Rm,Im) oxxx6(A64_EXTR|XS|DS,Rm,Im,Rn,Rd)
+# define ROR(Rd,Rn,Rm,Im) EXTR(Rd,Rn,Rm,Im)
# define AND(Rd,Rn,Rm) oxxx(A64_AND|XS,Rd,Rn,Rm)
/* actually should use oxxrs but logical_immediate returns proper encoding */
# define ANDI(Rd,Rn,Imm12) oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
@@ -514,6 +519,10 @@ static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
# define oxxx7(Op,Rt,Rt2,Rn,Simm7) _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
static void _oxxx7(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define oxxx6(Op,Rm,Imm6,Rn,Rd) _oxxx6(_jit,Op,Rm,Imm6,Rn,Rd)
+static void _oxxx6(jit_state_t*,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+
# define nop(i0) _nop(_jit,i0)
static void _nop(jit_state_t*,jit_int32_t);
# define addr(r0,r1,r2) ADD(r0,r1,r2)
@@ -584,6 +593,11 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define rshr_u(r0,r1,r2) LSR(r0,r1,r2)
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lroti(r0,r1,i0) rroti(r0,r1,64-i0)
+# define rrotr(r0,r1,r2) RORV(r0,r1,r2)
+# define rroti(r0,r1,i0) ROR(r0,r1,r1,i0)
# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
@@ -1033,6 +1047,24 @@ _oxxx7(jit_state_t *_jit, jit_int32_t Op,
}
static void
+_oxxx6(jit_state_t *_jit, jit_int32_t Op,
+ jit_int32_t Rm, jit_int32_t Imm6, jit_int32_t Rn, jit_int32_t Rd)
+{
+ instr_t i;
+ assert(!(Rm & ~0x1f));
+ assert(!(Rn & ~0x1f));
+ assert(!(Rd & ~0x1f));
+ assert(Imm6 >= 0 && Imm6 <= 63);
+ assert(!(Op & ~0xffe0fc00));
+ i.w = Op;
+ i.Rm.b = Rm;
+ i.imm6.b = Imm6;
+ i.Rn.b = Rn;
+ i.Rd.b = Rd;
+ ii(i.w);
+}
+
+static void
_nop(jit_state_t *_jit, jit_int32_t i0)
{
for (; i0 > 0; i0 -= 4)
@@ -1381,6 +1413,22 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, 64);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, 64);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
CMPI(r2, 0);
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月21日 00:17:50 +0000

AltStyle によって変換されたページ (->オリジナル) /