From a0d09a9548c613aa8a3ef90d362cf2f5e5f97a6f Mon Sep 17 00:00:00 2001 From: pcpa Date: Tue, 7 Mar 2023 18:05:11 -0300 Subject: Implement new bit rotate instructions. This commit also corrects some previous changes that were not properly tested and were failing to compile or having runtime problems, like using register 0 for addressing in s390. Still need to test on actual s390, as it fails in Hercules, but has the same encoding as shifts. For the moment presume it is a bug in the Hercules emulator. * check/alu_rot.tst, check/alu_rot.ok: New test files for the new lrotr, lroti, rrotr and rroti instructions. * check/Makefile.am, check/lightning.c, include/lightning.h.in, lib/jit_names.c: lib/lightning.c, doc/body.texi: Update for the new instructions. * lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_arm-cpu.c, lib/jit_arm.c: Implement optimized rrotr and rroti. lrotr and lroti just adjust parameters for a left shift rotate. * lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_ia64-cpu, lib/jit_ia64.c, lib/jit_riscv-cpu.c, lib/jit_riscv.c, jit_sparc-cpu.c, jit_sparc.c: Implement calls to fallback lrotr, lroti, rrotr and rroti. * lib/jit_hppa-cpu.c, lib/jit_hppa.c: Implement optimized rroti. Other instructions use fallbacks. * lib/jit_loongarch-cpu.c, lib/jit_loongarch.c: Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. * lib/jit_mips-cpu.c, lib/jit_mips.c: If mips2, Implement optimized rrotr and rroti. lrotr and lroti just adapt arguments and use a right shift. If mips1 use fallbacks. * lib/jit_ppc-cpu.c, lib/jit_ppc.c, jit_s390-cpu.c, jit_s390.c, lib/jit_x86-cpu.c, lib/jit_x86.c: Implement optimized lrotr, lroti, rrotr, rroti. * lib/jit_fallback.c: Implement fallbacks for lrotr, lroti, rrotr and rroti. Also add extra macro to avoid segfaults in s390, that cannot use register zero for some addressing instructions. --- lib/jit_mips-cpu.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'lib/jit_mips-cpu.c') diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 19d34a2..69f7ed0 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -413,7 +413,10 @@ static void _nop(jit_state_t*,jit_int32_t); # define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU) # define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM) # define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL) +# define ROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_SRLV) # define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL) +# define DROTR32(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL32) +# define DROTRV(rd,rt,rs) hrrrit(MIPS_SPECIAL,rs,rt,rd,1,MIPS_DSRLV) # define SYNC() rrr_t(_ZERO_REGNO,_ZERO_REGNO,_ZERO_REGNO,MIPS_SYNC) # define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI) # define MFLO(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO) @@ -620,6 +623,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0) +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define andr(r0,r1,r2) AND(r0,r1,r2) # define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -2198,6 +2208,58 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } #endif +static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_mips2_p()) { + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + } + else + fallback_lrotr(r0, r1, r2); +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_mips2_p()) { +#if __WORDSIZE == 32 + ROTRV(r0, r1, r2); +#else + DROTRV(r0, r1, r2); +#endif + } + else + fallback_rrotr(r0, r1, r2); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0>= 0 && i0 <= __WORDSIZE - 1); + if (jit_mips2_p()) { +#if __WORDSIZE == 32 + ROTR(r0, r1, i0); +#else + if (i0 < 32) + DROTR(r0, r1, i0); + else + DROTR32(r0, r1, i0 - 32); +#endif + } + else + fallback_lroti(r0, r1, i0); +} + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { -- cgit v1.2.3

AltStyle によって変換されたページ (->オリジナル) /