Implement new bit rotate instructions. - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
path: root/lib/jit_arm-cpu.c
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月07日 18:05:11 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月07日 18:05:11 -0300
commita0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f (patch)
tree07955115b1818a18a063e99692d7ffe79caf6870 /lib/jit_arm-cpu.c
parent87139e0f6c0c24db1458f5b7aca25f13bc4b6ac6 (diff)
downloadlightning-a0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f.tar.gz
Implement new bit rotate instructions.
This commit also corrects some previous changes that were not properly tested and were failing to compile or having runtime problems, like using register 0 for addressing in s390. Still need to test on actual s390, as it fails in Hercules, but has the same encoding as shifts. For the moment presume it is a bug in the Hercules emulator. * check/alu_rot.tst, check/alu_rot.ok: New test files for the new lrotr, lroti, rrotr and rroti instructions. * check/Makefile.am, check/lightning.c, include/lightning.h.in, lib/jit_names.c: lib/lightning.c, doc/body.texi: Update for the new instructions. * lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_arm-cpu.c, lib/jit_arm.c: Implement optimized rrotr and rroti. lrotr and lroti just adjust parameters for a left shift rotate. * lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_ia64-cpu, lib/jit_ia64.c, lib/jit_riscv-cpu.c, lib/jit_riscv.c, jit_sparc-cpu.c, jit_sparc.c: Implement calls to fallback lrotr, lroti, rrotr and rroti. * lib/jit_hppa-cpu.c, lib/jit_hppa.c: Implement optimized rroti. Other instructions use fallbacks. * lib/jit_loongarch-cpu.c, lib/jit_loongarch.c: Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. * lib/jit_mips-cpu.c, lib/jit_mips.c: If mips2, Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. If mips1 use fallbacks. * lib/jit_ppc-cpu.c, lib/jit_ppc.c, jit_s390-cpu.c, jit_s390.c, lib/jit_x86-cpu.c, lib/jit_x86.c: Implement optimized lrotr, lroti, rrotr, rroti. * lib/jit_fallback.c: Implement fallbacks for lrotr, lroti, rrotr and rroti. Also add extra macro to avoid segfaults in s390, that cannot use register zero for some addressing instructions.
Diffstat (limited to 'lib/jit_arm-cpu.c')
-rw-r--r--lib/jit_arm-cpu.c 58
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index c75c13f..c450679 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -224,6 +224,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned);
# define THUMB_ASRI 0x1000
# define THUMB2_ASRI 0xea4f0020
# define ARM_ROR 0x00000060
+# define THUMB_ROR 0x41c0
+# define THUMB2_ROR 0xfa60f000
+# define THUMB2_RORI 0xea4f0030
# define ARM_CMP 0x01500000
# define THUMB_CMP 0x4280
# define THUMB_CMPX 0x4500
@@ -637,6 +640,13 @@ static void _tdmb(jit_state_t *_jit, int im);
# define ASRI(rd,rn,im) CC_ASRI(ARM_CC_AL,rd,rn,im)
# define T1_ASRI(rd,rm,im) is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
# define T2_ASRI(rd,rm,im) tshift(THUMB2_ASRI,rd,rm,im)
+# define CC_ROR(cc,rd,rn,rm) CC_SHIFT(cc,ARM_ROR|ARM_R,rd,rm,rn,0)
+# define ROR(rd,rn,rm) CC_ROR(ARM_CC_AL,rd,rn,rm)
+# define T1_ROR(rdn,rm) is(THUMB_ROR|(_u3(rm)<<3)|_u3(rdn))
+# define T2_ROR(rd,rn,rm) torrr(THUMB2_ROR,rn,rd,rm)
+# define CC_RORI(cc,rd,rn,im) CC_SHIFT(cc,ARM_ROR,rd,0,rn,im)
+# define RORI(rd,rn,im) CC_RORI(ARM_CC_AL,rd,rn,im)
+# define T2_RORI(rd,rm,im) tshift(THUMB2_RORI,rd,rm,im)
# define CC_CMP(cc,rn,rm) corrr(cc,ARM_CMP,rn,0,rm)
# define CMP(rn,rm) CC_CMP(ARM_CC_AL,rn,rm)
# define T1_CMP(rn,rm) is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
@@ -1010,6 +1020,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lroti(r0,r1,i0) rroti(r0,r1,32-i0)
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define ccr(ct,cf,r0,r1,r2) _ccr(_jit,ct,cf,r0,r1,r2)
static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
# define cci(ct,cf,r0,r1,i0) _cci(_jit,ct,cf,r0,r1,i0)
@@ -2722,6 +2739,47 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, 64);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, 64);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_thumb_p()) {
+ if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+ T1_ROR(r0, r2);
+ else
+ T2_ROR(r0, r1, r2);
+ }
+ else
+ ROR(r0, r1, r2);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= 31);
+ if (i0 == 0)
+ movr(r0, r1);
+ else if (jit_thumb_p())
+ T2_RORI(r0, r1, i0);
+ else
+ RORI(r0, r1, i0);
+}
+
+static void
_ccr(jit_state_t *_jit, int ct, int cf,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月13日 01:39:28 +0000

AltStyle によって変換されたページ (->オリジナル) /