author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2022年09月08日 16:48:43 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2022年09月08日 16:48:43 -0300 |
commit | d5a7c8e4ad719e84dbb4904c532f906a1ef5a77b (patch) | |
tree | 58f01a849950cef4f32cbfbf4fd036d17ece66d7 /lib | |
parent | e7a4ea8828be7c71140b5a0ca4f891d0053c64a5 (diff) | |
download | lightning-d5a7c8e4ad719e84dbb4904c532f906a1ef5a77b.tar.gz |
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 7d2a99d..7572be7 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -318,6 +318,8 @@ typedef union { # define A64_LDRSB 0x38e06800 # define A64_STR 0xf8206800 # define A64_LDR 0xf8606800 +# define A64_LDAXR 0xc85ffc00 +# define A64_STLXR 0xc800fc00 # define A64_STRH 0x78206800 # define A64_LDRH 0x78606800 # define A64_LDRSH 0x78a06800 @@ -445,6 +447,8 @@ typedef union { # define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm) # define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12) # define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9) +# define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn) +# define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt) # define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm) # define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12) # define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9) @@ -674,6 +678,11 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) UXTH(r0,r1) # define extr_i(r0,r1) SXTW(r0,r1) # define extr_ui(r0,r1) UXTW(r0,r1) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define movr(r0,r1) _movr(_jit,r0,r1) static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) @@ -1827,6 +1836,32 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* retry: */ + retry = _jit->pc.w; + LDAXR(r0, r1); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + STLXR(r0, r3, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + CSET(r0, CC_EQ); + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (r0 != r1) diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index e1f6d96..90c8774 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -404,4 +404,6 @@ 8, /* bswapr_us */ 8, /* bswapr_ui */ 4, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index f0be046..dadf76e 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1137,6 +1137,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case_rrr(movn,); case_rrr(movz,); diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index 2dd701d..3809aa3 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -315,6 +315,9 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) @@ -828,6 +831,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index ecfeba3..9653e35 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -404,4 +404,6 @@ 16, /* bswapr_us */ 36, /* bswapr_ui */ 36, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index d7bb3ec..1a78b90 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -64,6 +64,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1095,6 +1096,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1503,6 +1512,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 14ba36b..91bb17c 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -36,6 +36,7 @@ # define jit_armv5_p() (jit_cpu.version >= 5) # define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend)) # define jit_armv6_p() (jit_cpu.version >= 6) +# define jit_armv7_p() (jit_cpu.version >= 7) # define jit_armv7r_p() 0 # define stack_framesize 48 extern int __aeabi_idivmod(int, int); @@ -179,7 +180,23 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */ # define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */ # define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */ +# define ARM_LDREX 0x01900090 +# define THUMB2_LDREX 0xe8500000 +# define ARM_STREX 0x01800090 +# define THUMB2_STREX 0xe8400000 /* << ARMv6* */ +/* >> ARMv7 */ +# define ARM_DMB 0xf57ff050 +# define THUMB2_DMB 0xf3bf8f50 +# define DMB_SY 0xf +# define DMB_ST 0xe +# define DMB_ISH 0xb +# define DMB_ISHST 0xa +# define DMB_NSH 0x7 +# define DMB_NSHT 0x6 +# define DMB_OSH 0x3 +# define DMB_OSHST 0x2 +/* << ARMv7 */ # define ARM_SHIFT 0x01a00000 # define ARM_R 0x00000010 /* register shift */ # define ARM_LSL 0x00000000 @@ -399,6 +416,12 @@ static void _tcit(jit_state_t*,unsigned int,int); static void _tpp(jit_state_t*,int,int); # define torl(o,rn,im) _torl(_jit,o,rn,im) static void _torl(jit_state_t*,int,int,int) maybe_unused; +# define DMB(im) dmb(im) +# define T2_DMB(im) tdmb(im) +# define dmb(im) _dmb(_jit, im) +static void _dmb(jit_state_t *_jit, int im); +# define tdmb(im) _tdmb(_jit, im) +static void _tdmb(jit_state_t *_jit, int im); # define CC_MOV(cc,rd,rm) corrr(cc,ARM_MOV,0,rd,rm) # define MOV(rd,rm) CC_MOV(ARM_CC_AL,rd,rm) # define T1_MOV(rd,rm) is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7)) @@ -718,6 +741,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) # define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im) # define T2_LDRDIN(rt,rt2,rn,im) torrri8(THUMB2_LDRDI,rn,rt,rt2,im) +# define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf) +# define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn) +# define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im) # define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm) # define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm) # define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) @@ -771,6 +797,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im) # define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im) # define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im) +# define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt) +# define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn) +# define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im) # define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im) # define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im) # define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im) @@ -847,6 +876,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0,r1) _comr(_jit,r0,r1) static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define negr(r0,r1) _negr(_jit,r0,r1) @@ -1509,6 +1543,22 @@ _torl(jit_state_t *_jit, int o, int rn, int im) } static void +_dmb(jit_state_t *_jit, int im) +{ + assert(!(im & 0xfffffff0)); + ii(ARM_DMB|im); +} + +static void +_tdmb(jit_state_t *_jit, int im) +{ + jit_thumb_t thumb; + assert(!(im & 0xfffffff0)); + thumb.i = THUMB2_DMB | im; + iss(thumb.s[0], thumb.s[1]); +} + +static void _nop(jit_state_t *_jit, jit_int32_t i0) { if (jit_thumb_p()) { @@ -1611,6 +1661,55 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if (!jit_armv7_p()) + fallback_casx(r0, r1, r2, r3, i0); + else { + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + if (jit_thumb_p()) { + T2_DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + T2_LDREX(r0, r1, 0); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + T2_STREX(r0, r3, r1, 0); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + done = _jit->pc.w; + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + T2_DMB(DMB_ISH); + } + else { + DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + LDREX(r0, r1); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + STREX(r0, r3, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + done = _jit->pc.w; + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + DMB(DMB_ISH); + } + patch_at(arm_patch_jump, jump0, done); + patch_at(arm_patch_jump, jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); + } +} + +static void _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 293d306..7997009 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -405,6 +405,8 @@ 8, /* bswapr_us */ 4, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -814,5 +816,7 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 0fdd1a7..ae0e9f5 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -90,6 +90,7 @@ extern void __clear_cache(void *, void *); # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1504,6 +1505,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case_rrr(movn,); case_rrr(movz,); @@ -2003,6 +2012,7 @@ _emit_code(jit_state_t *_jit) # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c new file mode 100644 index 0000000..89a6295 --- /dev/null +++ b/lib/jit_fallback.c @@ -0,0 +1,178 @@ +#if PROTO +#define fallback_save(r0) _fallback_save(_jit, r0) +static void _fallback_save(jit_state_t*, jit_int32_t); +#define fallback_load(r0) _fallback_load(_jit, r0) +static void _fallback_load(jit_state_t*, jit_int32_t); +#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0) +static void _fallback_save_regs(jit_state_t*, jit_int32_t); +#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0) +static void _fallback_load_regs(jit_state_t*, jit_int32_t); +#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1) +static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t); +#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im) +static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_word_t); +#endif + +#if CODE +static void +_fallback_save(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + stxi(_jitc->function->regoff[offset], rn(JIT_FP), regno); + break; + } + } +} + +static void +_fallback_load(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + ldxi(regno, rn(JIT_FP), _jitc->function->regoff[offset]); + break; + } + } +} + +static void +_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + regno = JIT_R(offset); + spec = _rvs[regno].spec; + if ((spec & jit_class_gpr) && regno == r0) + continue; + if (!(spec & jit_class_sav)) { + if (!_jitc->function->regoff[regno]) { + _jitc->function->regoff[regno] = + jit_allocai(sizeof(jit_word_t)); + _jitc->again = 1; + } + jit_regset_setbit(&_jitc->regsav, regno); + emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno); + } + } + /* If knew for certain float registers are not used by + * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ + for (offset = 0; offset < JIT_F_NUM; offset++) { + regno = JIT_F(offset); + spec = _rvs[regno].spec; + if (!(spec & jit_class_sav)) { + if (!_jitc->function->regoff[regno]) { + _jitc->function->regoff[regno] = + jit_allocai(sizeof(jit_word_t)); + _jitc->again = 1; + } + jit_regset_setbit(&_jitc->regsav, regno); + emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno); + } + } +} + +static void +_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + regno = JIT_R(offset); + spec = _rvs[regno].spec; + if ((spec & jit_class_gpr) && regno == r0) + continue; + if (!(spec & jit_class_sav)) { + jit_regset_clrbit(&_jitc->regsav, regno); + emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]); + } + } + /* If knew for certain float registers are not used by + * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ + for (offset = 0; offset < JIT_F_NUM; offset++) { + regno = JIT_F(offset); + spec = _rvs[regno].spec; + if (!(spec & jit_class_sav)) { + jit_regset_clrbit(&_jitc->regsav, regno); + emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]); + } + } +} + +static void +_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) +{ +# if defined(__mips__) + movi(rn(_A0), i1); +# elif defined(__arm__) + movi(rn(_R0), i1); +# elif defined(__sparc__) + movi(rn(_O0), i1); +# elif defined(__ia64__) + /* avoid confusion with pushargi patching */ + if (i1 >= -2097152 && i0 <= 2097151) + MOVI(_jitc->rout, i1); + else + MOVL(_jitc->rout, i1); +# elif defined(__hppa__) + movi(_R26_REGNO, i1); +# elif defined(__s390__) || defined(__s390x__) + movi(rn(_R2), i1); +# elif defined(__alpha__) + movi(rn(_A0), i1); +# elif defined(__riscv__) + movi(rn(JIT_RA0), i1); +# endif + calli(i0); +} + +static void +_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t jump, done; + /* XXX only attempts to fallback cas for lightning jit code */ + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + if ((iscasi = r1 == _NOREG)) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + } + fallback_save_regs(r0); + if (iscasi) + movi(r1, i0); + fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex); + fallback_load(r1); + ldr(r0, r1); + fallback_load(r2); + eqr(r0, r0, r2); + fallback_save(r0); + jump = bnei(_jit->pc.w, r0, 1); + fallback_load(r3); +# if __WORDSIZE == 32 + str_i(r1, r3); +# else + str_l(r1, r3); +# endif + /* done: */ + done = _jit->pc.w; + fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); + fallback_load(r0); +# if defined(__arm__) + patch_at(arm_patch_jump, jump, done); +# else + patch_at(jump, done); +# endif + fallback_load_regs(r0); + if (iscasi) + jit_unget_reg(r1_reg); +} +#endif diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 6ca54f3..155ec91 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -652,6 +652,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) #define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0) #define negr(r0,r1) SUB(_R0_REGNO,r1,r0) #define extr_c(r0,r1) EXTRWR(r1,31,8,r0) @@ -1652,6 +1657,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 1bfb7e6..e984bac 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -404,4 +404,6 @@ 36, /* bswapr_us */ 80, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 2668842..b994571 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -25,6 +25,7 @@ #define PROTO 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1028,6 +1029,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rrr(movn,); case_rrr(movz,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1459,6 +1468,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 63bb92d..b28e8f1 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1311,6 +1311,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -3500,6 +3505,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { bswapr_ul(r0, r1); diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index c81b3ea..020349d 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -404,4 +404,6 @@ 48, /* bswapr_us */ 48, /* bswapr_ui */ 16, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 8b4cd00..5664762 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -52,6 +52,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_ia64-cpu.c" # include "jit_ia64-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1175,6 +1176,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1693,6 +1702,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_ia64-cpu.c" # include "jit_ia64-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 4bd3dd1..0862592 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -522,6 +522,11 @@ static void _movi(jit_state_t*,jit_int32_t,jit_word_t); static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define movnr(r0,r1,r2) MOVN(r0, r1, r2) # define movzr(r0,r1,r2) MOVZ(r0, r1, r2) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ldr_c(r0,r1) LB(r0,0,r1) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); @@ -1329,6 +1334,13 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index b4642fa..25f0712 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -405,6 +405,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -814,6 +816,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -1222,4 +1226,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 25353a5..ecf025d 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -67,6 +67,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1432,6 +1433,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1874,6 +1883,7 @@ _emit_code(jit_state_t *_jit) # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_names.c b/lib/jit_names.c index ebd3d56..664adff 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -230,4 +230,5 @@ static char *code_name[] = { "movr_d_w", "movi_d_w", "bswapr_us", "bswapr_ui", "bswapr_ul", + "casr", "casi", }; diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 60de8cf..24ea5b2 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -271,6 +271,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LSWI(d,a,n) FX(31,d,a,n,597) # define LSWX(d,a,b) FX(31,d,a,b,533) # define LWARX(d,a,b) FX(31,d,a,b,20) +# define LDARX(d,a,b) FX(31,d,a,b,84) # define LWBRX(d,a,b) FX(31,d,a,b,534) # define LWA(d,a,s) FDs(58,d,a,s|2) # define LWAUX(d,a,b) FX(31,d,a,b,373) @@ -446,6 +447,7 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define STW(s,a,d) FDs(36,s,a,d) # define STWBRX(s,a,b) FX(31,s,a,b,662) # define STWCX_(s,a,b) FX_(31,s,a,b,150) +# define STDCX_(s,a,b) FX_(31,s,a,b,214) # define STWU(s,a,d) FDs(37,s,a,d) # define STWUX(s,a,b) FX(31,s,a,b,183) # define STWX(s,a,b) FX(31,s,a,b,151) @@ -511,6 +513,11 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) # define extr_c(r0,r1) EXTSB(r0,r1) @@ -1150,6 +1157,42 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LWARX(r0, _R0_REGNO, r1); +# else + LDARX(r0, _R0_REGNO, r1); +# endif + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ +# if __WORDSIZE == 32 + STWCX_(r3, _R0_REGNO, r1); +# else + STDCX_(r3, _R0_REGNO, r1); +# endif + jump1 = bnei(_jit->pc.w, r0, 0); /* bne retry r0 0 */ + /* done: */ + done = _jit->pc.w; + ISYNC(); + MFCR(r0); + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) { jit_int32_t reg, addr_reg; diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 0be7047..9cd006c 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -406,6 +406,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* _CALL_SYV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -819,6 +821,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -1231,6 +1235,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1642,6 +1648,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 9512f94..b620d30 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1369,6 +1369,14 @@ _emit_code(jit_state_t *_jit) # endif case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); diff --git a/lib/jit_print.c b/lib/jit_print.c index 61d9650..23b34e3 100644 --- a/lib/jit_print.c +++ b/lib/jit_print.c @@ -107,7 +107,7 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) (jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp| jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg| jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg| - jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl); + jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh); if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) || node->code == jit_code_finishr || node->code == jit_code_finishi)) @@ -217,6 +217,18 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) print_chr(' '); print_reg(node->u.q.h); print_str(") "); print_reg(node->v.w); print_chr(' '); print_hex(node->w.w); return; + r_r_q: + print_chr(' '); print_reg(node->u.w); + print_chr(' '); print_reg(node->v.w); + print_str(" ("); print_reg(node->w.q.l); + print_chr(' '); print_reg(node->w.q.h); + print_str(") "); return; + r_w_q: + print_chr(' '); print_reg(node->u.w); + print_chr(' '); print_hex(node->v.w); + print_str(" ("); print_reg(node->w.q.l); + print_chr(' '); print_reg(node->w.q.h); + print_str(") "); return; r_r_f: print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -357,6 +369,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) case jit_cc_a0_reg|jit_cc_a0_rlh| jit_cc_a1_reg|jit_cc_a2_int: goto q_r_w; + case jit_cc_a0_reg|jit_cc_a1_reg| + jit_cc_a2_reg|jit_cc_a2_rlh: + goto r_r_q; + case jit_cc_a0_reg|jit_cc_a1_int| + jit_cc_a2_reg|jit_cc_a2_rlh: + goto r_w_q; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt: goto r_r_f; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl: diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c index 9f029c0..5046fac 100644 --- a/lib/jit_riscv-cpu.c +++ b/lib/jit_riscv-cpu.c @@ -456,6 +456,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); + define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ltr(r0, r1, r2) SLT(r0, r1, r2) # define lti(r0, r1, im) _lti(_jit, r0, r1, im) static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1340,6 +1345,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { if (simm12_p(i0)) diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c index c8908d8..ea2911f 100644 --- a/lib/jit_riscv-sz.c +++ b/lib/jit_riscv-sz.c @@ -403,4 +403,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c index 1dc3c9e..966604a 100644 --- a/lib/jit_riscv.c +++ b/lib/jit_riscv.c @@ -34,6 +34,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1134,6 +1135,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 619ab15..2c10787 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -973,6 +973,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -2469,6 +2474,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { if (r0 == r2) diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c index bb9071d..cea2d44 100644 --- a/lib/jit_s390-sz.c +++ b/lib/jit_s390-sz.c @@ -404,6 +404,8 @@ 52, /* bswapr_us */ 128, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -811,4 +813,6 @@ 68, /* bswapr_us */ 160, /* bswapr_ui */ 344, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 4b89bea..ef0c899 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -88,6 +88,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1165,6 +1166,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 90c3767..ecea506 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -552,6 +552,11 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) # define addr(r0, r1, r2) ADD(r1, r2, r0) @@ -1234,6 +1239,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + +static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 5ec051d..5e7ef95 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -403,6 +403,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -810,4 +812,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 23d4442..a677998 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -40,6 +40,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1477,6 +1478,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1875,6 +1884,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 81534f0..0d8affe 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -369,6 +369,11 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t); static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t); # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) #define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); #define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) @@ -2219,6 +2224,66 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t save_rax, restore_rax; + jit_int32_t ascasr_reg, ascasr_use; + if (r0 != _RAX_REGNO) { /* result not in %rax */ + if (r2 != _RAX_REGNO) { /* old value not in %rax */ + save_rax = jit_get_reg(jit_class_gpr); + movr(rn(save_rax), _RAX_REGNO); + restore_rax = 1; + } + else + restore_rax = 0; + } + else + restore_rax = 0; + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + if (r1 == _NOREG) { /* using immediate address */ + if (!can_sign_extend_int_p(i0)) { + ascasr_reg = jit_get_reg(jit_class_gpr); + if (ascasr_reg == _RAX) { + ascasr_reg = jit_get_reg(jit_class_gpr); + jit_unget_reg(_RAX); + } + ascasr_use = 1; + movi(rn(ascasr_reg), i0); + } + else + ascasr_use = 0; + } + else + ascasr_use = 0; + ic(0xf0); /* lock */ + if (ascasr_use) + rex(0, WIDE, r3, _NOREG, rn(ascasr_reg)); + else + rex(0, WIDE, r3, _NOREG, r1); + ic(0x0f); + ic(0xb1); + if (r1 != _NOREG) /* casr */ + rx(r3, 0, r1, _NOREG, _SCL1); + else { /* casi */ + if (ascasr_use) + rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */ + else + rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */ + } + cc(X86_CC_E, r0); + if (r0 != _RAX_REGNO) + movr(r0, _RAX_REGNO); + if (restore_rax) { + movr(_RAX_REGNO, rn(save_rax)); + jit_unget_reg(save_rax); + } + if (ascasr_use) + jit_unget_reg(ascasr_reg); +} + +static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { assert(jit_cmov_p()); diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index bd4b9a0..ff7548a 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -404,6 +404,8 @@ 7, /* bswapr_us */ 4, /* bswapr_ui */ 0, /* bswapr_ul */ + 9, /* casr */ + 0, /* casi */ #endif #if __X64 @@ -812,6 +814,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 6, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #else # if __X64_32 @@ -1219,6 +1223,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ # else #define JIT_INSTR_MAX 115 @@ -1625,6 +1631,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 6, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/lib/jit_x86.c b/lib/jit_x86.c index e3e1383..fb0b06b 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1674,6 +1674,14 @@ _emit_code(jit_state_t *_jit) case_rrw(gt, _u); case_rrr(ne,); case_rrw(ne,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); diff --git a/lib/lightning.c b/lib/lightning.c index 51456be..d219e6d 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1139,6 +1139,20 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code, } jit_node_t * +_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_word_t v, + jit_int32_t l, jit_int32_t h) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + node->u.w = u; + node->v.w = v; + node->w.q.l = l; + node->w.q.h = h; + return (link_node(node)); +} + +jit_node_t * _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float32_t w) { @@ -1539,6 +1553,14 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_movnr: case jit_code_movzr: mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg; break; + case jit_code_casr: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; + case jit_code_casi: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; default: abort(); } @@ -1806,13 +1828,24 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) else jit_regset_setbit(&_jitc->reglive, node->v.w); } - if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) { - if (value & jit_cc_a2_chg) { - jit_regset_clrbit(&_jitc->reglive, node->w.w); - jit_regset_setbit(&_jitc->regmask, node->w.w); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + /* Assume registers are not changed */ + if (!(node->w.q.l & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.l); + if (!(node->w.q.h & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.h); + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (value & jit_cc_a2_chg) { + jit_regset_clrbit(&_jitc->reglive, node->w.w); + jit_regset_setbit(&_jitc->regmask, node->w.w); + } + else + jit_regset_setbit(&_jitc->reglive, node->w.w); + } } - else - jit_regset_setbit(&_jitc->reglive, node->w.w); } if (jit_regset_set_p(&_jitc->regmask)) { jit_update(node->next, &_jitc->reglive, &_jitc->regmask); @@ -1843,8 +1876,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + } } void @@ -1863,8 +1902,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + } } void @@ -2302,11 +2347,26 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->w.w)) { - jit_regset_clrbit(®mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(®live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(®mask, node->w.q.l)) + jit_regset_clrbit(®mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.q.h)) + jit_regset_clrbit(®mask, node->w.q.h); + } + } + else { + if (value & jit_cc_a2_reg) { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.w)) { + jit_regset_clrbit(®mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(®live, node->w.w); + } + } } } } @@ -2453,11 +2513,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(mask, node->w.w)) { - jit_regset_clrbit(mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(mask, node->w.q.l)) + jit_regset_clrbit(mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.q.h)) + jit_regset_clrbit(mask, node->w.q.h); + } + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.w)) { + jit_regset_clrbit(mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(live, node->w.w); + } } } } @@ -3298,9 +3371,24 @@ _simplify(jit_state_t *_jit) ++_jitc->gen[regno]; } if (info & jit_cc_a2_chg) { - regno = jit_regno(node->w.w); - _jitc->values[regno].kind = 0; - ++_jitc->gen[regno]; +#if 0 + /* Assume registers are not changed */ + if (info & jit_cc_a2_rlh) { + regno = jit_regno(node->w.q.l); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + regno = jit_regno(node->w.q.h); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + } + else { +#endif + regno = jit_regno(node->w.w); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; +#if 0 + } +#endif } break; } @@ -3505,8 +3593,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, } if ((value & jit_cc_a1_reg) && node->v.w == regno) node->v.w = patch; - if ((value & jit_cc_a2_reg) && node->w.w == regno) - node->w.w = patch; + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + if (node->w.q.l == regno) + node->w.q.l = patch; + if (node->w.q.h == regno) + node->w.q.h = patch; + } + else { + if (node->w.w == regno) + node->w.w = patch; + } + } } } |