-rw-r--r-- | TODO | 6 | ||||
-rw-r--r-- | include/lightning/jit_arm.h | 6 | ||||
-rw-r--r-- | lib/jit_arm-cpu.c | 520 | ||||
-rw-r--r-- | lib/jit_arm-sz.c | 32 | ||||
-rw-r--r-- | lib/jit_arm-vfp.c | 24 | ||||
-rw-r--r-- | lib/jit_arm.c | 14 |
@@ -1 +1,5 @@ -o Use SIMD instructions for load/store in aarch64 +o Use PC relative load/store in aarch64 +o Check post-index in real arm hardware +o Implement valid encodings for vfp pre/post index in arm hardware +o Implement valid encodings for pre/post index in arm hardware with register + increment diff --git a/include/lightning/jit_arm.h b/include/lightning/jit_arm.h index 558f553..0f73166 100644 --- a/include/lightning/jit_arm.h +++ b/include/lightning/jit_arm.h @@ -29,6 +29,7 @@ #define jit_swf_p() (jit_cpu.vfp == 0) #define jit_hardfp_p() jit_cpu.abi #define jit_ldrt_strt_p() jit_cpu.ldrt_strt +#define jit_post_index_p() jit_cpu.post_index #define JIT_FP _R11 typedef enum { @@ -125,6 +126,11 @@ typedef struct { * is in arm mode, or the reverse, what may cause a crash upon return * of that function if generating jit for a relative jump. */ + /* Apparently a qemu 8.1.3 and possibly others bug, that treat + * ldrT Rt, [Rn, #+-<immN>]! and ldrT Rt, [Rn, #+/-<immN> + * identically, as a pre-index but the second one should adjust + * Rn after the load */ + jit_uint32_t post_index : 1; jit_uint32_t exchange : 1; /* By default assume cannot load unaligned data. * A3.2.1 diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 813b01e..6fc3fea 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -270,6 +270,8 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB2_BLI 0xf000d000 /* ldr/str */ # define ARM_U 0x00800000 /* positive offset */ +# define ARM_P 0x01000000 /* index */ +# define ARM_W 0x00200000 /* writeback */ # define THUMB2_P 0x00000400 # define THUMB2_U 0x00000200 # define THUMB2_W 0x00000100 @@ -338,9 +340,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); /* ldm/stm */ # define ARM_M 0x08000000 # define ARM_M_L 0x00100000 /* load; store if not set */ -# define ARM_M_I 0x00800000 /* inc; dec if not set */ -# define ARM_M_B 0x01000000 /* before; after if not set */ -# define ARM_M_U 0x00200000 /* update Rn */ +# define ARM_M_U 0x00800000 /* inc; dec if not set */ +# define ARM_M_P 0x01000000 /* before; after if not set */ +# define ARM_M_W 0x00200000 /* update Rn */ # define THUMB2_LDM_W 0x00200000 # define THUMB2_LDM_P 0x00008000 # define THUMB2_LDM_M 0x00004000 @@ -723,69 +725,134 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int); # define T2_LDRSB(rt,rn,rm) torxr(THUMB2_LDRSB,rn,rt,rm) # define CC_LDRSBN(cc,rt,rn,rm) corrr(cc,ARM_LDRSB,rn,rt,rm) # define LDRSBN(rt,rn,rm) CC_LDRSBN(ARM_CC_AL,rt,rn,rm) + # define CC_LDRSBI(cc,rt,rn,im) corri8(cc,ARM_LDRSBI|ARM_U,rn,rt,im) # define LDRSBI(rt,rn,im) CC_LDRSBI(ARM_CC_AL,rt,rn,im) +# define LDRSBI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRSBI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_U|ARM_W,rn,rt,im) + # define T2_LDRSBI(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im) # define T2_LDRSBWI(rt,rn,im) torri12(THUMB2_LDRSBWI,rn,rt,im) +# define T2_LDRSBI_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRSBI_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_LDRSBIN(cc,rt,rn,im) corri8(cc,ARM_LDRSBI,rn,rt,im) # define LDRSBIN(rt,rn,im) CC_LDRSBIN(ARM_CC_AL,rt,rn,im) +# define LDRSBIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_W,rn,rt,im) +# define LDRSBIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_W,rn,rt,im) + # define T2_LDRSBIN(rt,rn,im) torri8(THUMB2_LDRSBI,rn,rt,im) +# define T2_LDRSBIN_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRSBIN_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_W,rn,rt,im) + # define CC_LDRB(cc,rt,rn,rm) corrr(cc,ARM_LDRB|ARM_U,rn,rt,rm) # define LDRB(rt,rn,rm) CC_LDRB(ARM_CC_AL,rt,rn,rm) # define T1_LDRB(rt,rn,rm) is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRB(rt,rn,rm) torxr(THUMB2_LDRB,rn,rt,rm) # define CC_LDRBN(cc,rt,rn,rm) corrr(cc,ARM_LDRB,rn,rt,rm) # define LDRBN(rt,rn,rm) CC_LDRBN(ARM_CC_AL,rt,rn,rm) + # define CC_LDRBI(cc,rt,rn,im) corri(cc,ARM_LDRBI|ARM_U,rn,rt,im) # define LDRBI(rt,rn,im) CC_LDRBI(ARM_CC_AL,rt,rn,im) +# define LDRBI_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRBI_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_U|ARM_W,rn,rt,im) + # define T1_LDRBI(rt,rn,im) is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRBI(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im) # define T2_LDRBWI(rt,rn,im) torri12(THUMB2_LDRBWI,rn,rt,im) +# define T2_LDRBI_B(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRBI_A(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_LDRBIN(cc,rt,rn,im) corri(cc,ARM_LDRBI,rn,rt,im) # define LDRBIN(rt,rn,im) CC_LDRBIN(ARM_CC_AL,rt,rn,im) +# define LDRBIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_W,rn,rt,im) +# define LDRBIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_W,rn,rt,im) + # define T2_LDRBIN(rt,rn,im) torri8(THUMB2_LDRBI,rn,rt,im) +# define T2_LDRBIN_B(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRBIN_A(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_W,rn,rt,im) + # define CC_LDRSH(cc,rt,rn,rm) corrr(cc,ARM_LDRSH|ARM_U,rn,rt,rm) # define LDRSH(rt,rn,rm) CC_LDRSH(ARM_CC_AL,rt,rn,rm) # define T1_LDRSH(rt,rn,rm) is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRSH(rt,rn,rm) torxr(THUMB2_LDRSH,rn,rt,rm) # define CC_LDRSHN(cc,rt,rn,rm) corrr(cc,ARM_LDRSH,rn,rt,rm) # define LDRSHN(rt,rn,rm) CC_LDRSHN(ARM_CC_AL,rt,rn,rm) + # define CC_LDRSHI(cc,rt,rn,im) corri8(cc,ARM_LDRSHI|ARM_U,rn,rt,im) # define LDRSHI(rt,rn,im) CC_LDRSHI(ARM_CC_AL,rt,rn,im) +# define LDRSHI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRSHI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_U|ARM_W,rn,rt,im) + # define T2_LDRSHI(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im) # define T2_LDRSHWI(rt,rn,im) torri12(THUMB2_LDRSHWI,rn,rt,im) +# define T2_LDRSHI_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRSHI_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_LDRSHIN(cc,rt,rn,im) corri8(cc,ARM_LDRSHI,rn,rt,im) # define LDRSHIN(rt,rn,im) CC_LDRSHIN(ARM_CC_AL,rt,rn,im) +# define LDRSHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_W,rn,rt,im) +# define LDRSHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_W,rn,rt,im) + # define T2_LDRSHIN(rt,rn,im) torri8(THUMB2_LDRSHI,rn,rt,im) +# define T2_LDRSHIN_B(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRSHIN_A(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_W,rn,rt,im) + # define CC_LDRH(cc,rt,rn,rm) corrr(cc,ARM_LDRH|ARM_U,rn,rt,rm) # define LDRH(rt,rn,rm) CC_LDRH(ARM_CC_AL,rt,rn,rm) # define T1_LDRH(rt,rn,rm) is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRH(rt,rn,rm) torxr(THUMB2_LDRH,rn,rt,rm) # define CC_LDRHN(cc,rt,rn,rm) corrr(cc,ARM_LDRH,rn,rt,rm) # define LDRHN(rt,rn,rm) CC_LDRHN(ARM_CC_AL,rt,rn,rm) + # define CC_LDRHI(cc,rt,rn,im) corri8(cc,ARM_LDRHI|ARM_U,rn,rt,im) # define LDRHI(rt,rn,im) CC_LDRHI(ARM_CC_AL,rt,rn,im) +# define LDRHI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRHI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_U|ARM_W,rn,rt,im) + # define T1_LDRHI(rt,rn,im) is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRHI(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im) # define T2_LDRHWI(rt,rn,im) torri12(THUMB2_LDRHWI,rn,rt,im) +# define T2_LDRHI_B(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRHI_A(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_LDRHIN(cc,rt,rn,im) corri8(cc,ARM_LDRHI,rn,rt,im) # define LDRHIN(rt,rn,im) CC_LDRHIN(ARM_CC_AL,rt,rn,im) +# define LDRHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_W,rn,rt,im) +# define LDRHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_W,rn,rt,im) + # define T2_LDRHIN(rt,rn,im) torri8(THUMB2_LDRHI,rn,rt,im) +# define T2_LDRHIN_B(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRHIN_A(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_W,rn,rt,im) + # define CC_LDR(cc,rt,rn,rm) corrr(cc,ARM_LDR|ARM_U,rn,rt,rm) # define LDR(rt,rn,rm) CC_LDR(ARM_CC_AL,rt,rn,rm) # define T1_LDR(rt,rn,rm) is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDR(rt,rn,rm) torxr(THUMB2_LDR,rn,rt,rm) # define CC_LDRN(cc,rt,rn,rm) corrr(cc,ARM_LDR,rn,rt,rm) # define LDRN(rt,rn,rm) CC_LDRN(ARM_CC_AL,rt,rn,rm) + # define CC_LDRI(cc,rt,rn,im) corri(cc,ARM_LDRI|ARM_U,rn,rt,im) # define LDRI(rt,rn,im) CC_LDRI(ARM_CC_AL,rt,rn,im) +# define LDRI_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRI_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_U|ARM_W,rn,rt,im) + # define T1_LDRI(rt,rn,im) is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T1_LDRISP(rt,im) is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im)) # define T2_LDRI(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im) # define T2_LDRWI(rt,rn,im) torri12(THUMB2_LDRWI,rn,rt,im) +# define T2_LDRI_B(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRI_A(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_LDRIN(cc,rt,rn,im) corri(cc,ARM_LDRI,rn,rt,im) # define LDRIN(rt,rn,im) CC_LDRIN(ARM_CC_AL,rt,rn,im) +# define LDRIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_W,rn,rt,im) +# define LDRIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_W,rn,rt,im) + # define T2_LDRIN(rt,rn,im) torri8(THUMB2_LDRI,rn,rt,im) +# define T2_LDRIN_B(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRIN_A(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_W,rn,rt,im) + # define CC_LDRD(cc,rt,rn,rm) corrr(cc,ARM_LDRD|ARM_U,rn,rt,rm) # define LDRD(rt,rn,rm) CC_LDRD(ARM_CC_AL,rt,rn,rm) # define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_U,rn,rt,rt2,im) @@ -805,43 +872,84 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int); # define T2_STRB(rt,rn,rm) torxr(THUMB2_STRB,rn,rt,rm) # define CC_STRBN(cc,rt,rn,rm) corrr(cc,ARM_STRB,rn,rt,rm) # define STRBN(rt,rn,rm) CC_STRBN(ARM_CC_AL,rt,rn,rm) + # define CC_STRBI(cc,rt,rn,im) corri(cc,ARM_STRBI|ARM_U,rn,rt,im) # define STRBI(rt,rn,im) CC_STRBI(ARM_CC_AL,rt,rn,im) +# define STRBI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRBI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_U|ARM_W,rn,rt,im) + # define T1_STRBI(rt,rn,im) is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRBI(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im) # define T2_STRBWI(rt,rn,im) torri12(THUMB2_STRBWI,rn,rt,im) +# define T2_STRBI_B(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRBI_A(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_STRBIN(cc,rt,rn,im) corri(cc,ARM_STRBI,rn,rt,im) # define STRBIN(rt,rn,im) CC_STRBIN(ARM_CC_AL,rt,rn,im) +# define STRBIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_W,rn,rt,im) +# define STRBIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_W,rn,rt,im) + # define T2_STRBIN(rt,rn,im) torri8(THUMB2_STRBI,rn,rt,im) +# define T2_STRBIN_B(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRBIN_A(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_W,rn,rt,im) + # define CC_STRH(cc,rt,rn,rm) corrr(cc,ARM_STRH|ARM_U,rn,rt,rm) # define STRH(rt,rn,rm) CC_STRH(ARM_CC_AL,rt,rn,rm) +# define STRBI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRBI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_U|ARM_W,rn,rt,im) # define T1_STRH(rt,rn,rm) is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRH(rt,rn,rm) torxr(THUMB2_STRH,rn,rt,rm) # define CC_STRHN(cc,rt,rn,rm) corrr(cc,ARM_STRH,rn,rt,rm) # define STRHN(rt,rn,rm) CC_STRHN(ARM_CC_AL,rt,rn,rm) + # define CC_STRHI(cc,rt,rn,im) corri8(cc,ARM_STRHI|ARM_U,rn,rt,im) # define STRHI(rt,rn,im) CC_STRHI(ARM_CC_AL,rt,rn,im) +# define STRHI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRHI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRHI|ARM_U|ARM_W,rn,rt,im) + # define T1_STRHI(rt,rn,im) is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRHI(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im) # define T2_STRHWI(rt,rn,im) torri12(THUMB2_STRHWI,rn,rt,im) +# define T2_STRHI_B(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRHI_A(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_STRHIN(cc,rt,rn,im) corri8(cc,ARM_STRHI,rn,rt,im) # define STRHIN(rt,rn,im) CC_STRHIN(ARM_CC_AL,rt,rn,im) +# define STRHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_W,rn,rt,im) +# define STRHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_STRHI|ARM_W,rn,rt,im) + # define T2_STRHIN(rt,rn,im) torri8(THUMB2_STRHI,rn,rt,im) +# define T2_STRHIN_B(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRHIN_A(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_W,rn,rt,im) + # define CC_STR(cc,rt,rn,rm) corrr(cc,ARM_STR|ARM_U,rn,rt,rm) # define STR(rt,rn,rm) CC_STR(ARM_CC_AL,rt,rn,rm) # define T1_STR(rt,rn,rm) is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STR(rt,rn,rm) torxr(THUMB2_STR,rn,rt,rm) # define CC_STRN(cc,rt,rn,rm) corrr(cc,ARM_STR,rn,rt,rm) # define STRN(rt,rn,rm) CC_STRN(ARM_CC_AL,rt,rn,rm) + # define CC_STRI(cc,rt,rn,im) corri(cc,ARM_STRI|ARM_U,rn,rt,im) # define STRI(rt,rn,im) CC_STRI(ARM_CC_AL,rt,rn,im) +# define STRI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_U|ARM_W,rn,rt,im) + # define T1_STRI(rt,rn,im) is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T1_STRISP(rt,im) is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im))) # define T2_STRI(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U,rn,rt,im) # define T2_STRWI(rt,rn,im) torri12(THUMB2_STRWI,rn,rt,im) +# define T2_STRI_B(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRI_A(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U|THUMB2_W,rn,rt,im) + # define CC_STRIN(cc,rt,rn,im) corri(cc,ARM_STRI,rn,rt,im) # define STRIN(rt,rn,im) CC_STRIN(ARM_CC_AL,rt,rn,im) +# define STRIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_W,rn,rt,im) +# define STRIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_W,rn,rt,im) + # define T2_STRIN(rt,rn,im) torri8(THUMB2_STRI,rn,rt,im) +# define T2_STRIN_B(rt,rn,im) torri8(THUMB2_STRI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRIN_A(rt,rn,im) torri8(THUMB2_STRI|THUMB2_W,rn,rt,im) + # define CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_U,rn,rt,rm) # define STRD(rt,rn,rm) CC_STRD(ARM_CC_AL,rt,rn,rm) # define CC_STRDN(cc,rt,rn,rm) corrr(cc,ARM_STRD,rn,rt,rm) @@ -855,47 +963,47 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int); # define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt) # define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn) # define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im) -# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im) +# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im) # define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im) # define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im) # define LDM(rn,im) LDMIA(rn,im) # define T1_LDMIA(rn,im) is(THUMB_LDMIA|(_u3(rn)<<8)|im) # define T2_LDMIA(rn,im) torl(THUMB2_LDMIA,rn,im) -# define CC_LDMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im) +# define CC_LDMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_W,rn,im) # define LDMIA_U(rn,im) CC_LDMIA_U(ARM_CC_AL,rn,im) # define LDM_U(r0,i0) LDMIA_U(r0,i0) -# define CC_LDMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im) +# define CC_LDMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P,rn,im) # define LDMIB(rn,im) CC_LDMIB(ARM_CC_AL,rn,im) -# define CC_LDMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im) +# define CC_LDMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P|ARM_M_W,rn,im) # define LDMIB_U(rn,im) CC_LDMIB_U(ARM_CC_AL,rn,im) # define CC_LDMDA(cc,rn,im) corl(cc,ARM_M|ARM_M_L,rn,im) # define LDMDA(rn,im) CC_LDMDA(ARM_CC_AL,rn,im) -# define CC_LDMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im) +# define CC_LDMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_W,rn,im) # define LDMDA_U(rn,im) CC_LDMDA_U(ARM_CC_AL,rn,im) -# define CC_LDMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im) +# define CC_LDMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_P,rn,im) # define LDMDB(rn,im) CC_LDMDB(ARM_CC_AL,rn,im) # define T2_LDMDB(rn,im) torl(THUMB2_LDMDB,rn,im) -# define CC_LDMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im) +# define CC_LDMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_P|ARM_M_W,rn,im) # define LDMDB_U(rn,im) CC_LDMDB_U(ARM_CC_AL,rn,im) -# define CC_STMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_I,rn,im) +# define CC_STMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_U,rn,im) # define STMIA(rn,im) CC_STMIA(ARM_CC_AL,rn,im) # define CC_STM(cc,rn,im) CC_STMIA(cc,rn,im) # define STM(rn,im) STMIA(rn,im) -# define CC_STMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im) +# define CC_STMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_W,rn,im) # define STMIA_U(rn,im) CC_STMIA_U(ARM_CC_AL,rn,im) # define CC_STM_U(cc,rn,im) CC_STMIA_U(cc,rn,im) # define STM_U(rn,im) STMIA_U(rn,im) -# define CC_STMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im) +# define CC_STMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_P,rn,im) # define STMIB(rn,im) CC_STMIB(ARM_CC_AL,rn,im) -# define CC_STMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im) +# define CC_STMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_P|ARM_M_W,rn,im) # define STMIB_U(rn,im) CC_STMIB_U(ARM_CC_AL,rn,im) # define CC_STMDA(cc,rn,im) corl(cc,ARM_M,rn,im) # define STMDA(rn,im) CC_STMDA(ARM_CC_AL,rn,im) -# define CC_STMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U,rn,im) +# define CC_STMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_W,rn,im) # define STMDA_U(rn,im) CC_STMDA_U(ARM_CC_AL,rn,im) -# define CC_STMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_B,rn,im) +# define CC_STMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_P,rn,im) # define STMDB(rn,im) CC_STMDB(ARM_CC_AL,rn,im) -# define CC_STMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im) +# define CC_STMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_P|ARM_M_W,rn,im) # define STMDB_U(rn,im) CC_STMDB_U(ARM_CC_AL,rn,im) # define CC_PUSH(cc,im) CC_STMDB_U(cc,_SP_REGNO,im) # define PUSH(im) STMDB_U(_SP_REGNO,im) @@ -1199,6 +1307,36 @@ static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1) static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define ldxbr_c(r0, r1, r2) generic_ldxbr_c(r0, r1, r2) +# define ldxbi_c(r0, r1, i0) _ldxbi_c(_jit, r0, r1, i0) +static void _ldxbi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_uc(r0, r1, r2) generic_ldxbr_uc(r0, r1, r2) +# define ldxbi_uc(r0, r1, i0) _ldxbi_uc(_jit, r0, r1, i0) +static void _ldxbi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_s(r0, r1, r2) generic_ldxbr_s(r0, r1, r2) +# define ldxbi_s(r0, r1, i0) _ldxbi_s(_jit, r0, r1, i0) +static void _ldxbi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_us(r0, r1, r2) generic_ldxbr_us(r0, r1, r2) +# define ldxbi_us(r0, r1, i0) _ldxbi_us(_jit, r0, r1, i0) +static void _ldxbi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_i(r0, r1, r2) generic_ldxbr_i(r0, r1, r2) +# define ldxbi_i(r0, r1, i0) _ldxbi_i(_jit, r0, r1, i0) +static void _ldxbi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_c(r0, r1, r2) generic_ldxar_c(r0, r1, r2) +# define ldxai_c(r0, r1, i0) _ldxai_c(_jit, r0, r1, i0) +static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_uc(r0, r1, r2) generic_ldxar_uc(r0, r1, r2) +# define ldxai_uc(r0, r1, i0) _ldxai_uc(_jit, r0, r1, i0) +static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_s(r0, r1, r2) generic_ldxar_s(r0, r1, r2) +# define ldxai_s(r0, r1, i0) _ldxai_s(_jit, r0, r1, i0) +static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_us(r0, r1, r2) generic_ldxar_us(r0, r1, r2) +# define ldxai_us(r0, r1, i0) _ldxai_us(_jit, r0, r1, i0) +static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_i(r0, r1, r2) generic_ldxar_i(r0, r1, r2) +# define ldxai_i(r0, r1, i0) _ldxai_i(_jit, r0, r1, i0) +static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define str_c(r0,r1) _str_c(_jit,r0,r1) static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t); # define sti_c(i0,r0) _sti_c(_jit,i0,r0) @@ -1227,6 +1365,24 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); #define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1) static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define stxbr_c(r0, r1, r2) generic_stxbr_c(r0, r1, r2) +# define stxbi_c(i0, r0, r1) _stxbi_c(_jit, i0, r0, r1) +static void _stxbi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxbr_s(r0, r1, r2) generic_stxbr_s(r0, r1, r2) +# define stxbi_s(i0, r0, r1) _stxbi_s(_jit, i0, r0, r1) +static void _stxbi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxbr_i(r0, r1, r2) generic_stxbr_i(r0, r1, r2) +# define stxbi_i(i0, r0, r1) _stxbi_i(_jit, i0, r0, r1) +static void _stxbi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_c(r0, r1, r2) generic_stxar_c(r0, r1, r2) +# define stxai_c(i0, r0, r1) _stxai_c(_jit, i0, r0, r1) +static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_s(r0, r1, r2) generic_stxar_s(r0, r1, r2) +# define stxai_s(i0, r0, r1) _stxai_s(_jit, i0, r0, r1) +static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_i(r0, r1, r2) generic_stxar_i(r0, r1, r2) +# define stxai_i(i0, r0, r1) _stxai_i(_jit, i0, r0, r1) +static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -3826,6 +3982,214 @@ _unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) } static void +_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSBI_B(r0, r1, i0); + else + T2_LDRSBIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSBI_B(r0, r1, i0); + else + LDRSBIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_c(r0, r1, i0); +} + +static void +_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRBI_B(r0, r1, i0); + else + T2_LDRBIN_B(r0, r1, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRBI_B(r0, r1, i0); + else + LDRBIN_B(r0, r1, -i0); + } + else + generic_ldxbi_uc(r0, r1, i0); +} + +static void +_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSHI_B(r0, r1, i0); + else + T2_LDRSHIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSHI_B(r0, r1, i0); + else + LDRSHIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_s(r0, r1, i0); +} + +static void +_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRHI_B(r0, r1, i0); + else + T2_LDRHIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRHI_B(r0, r1, i0); + else + LDRHIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_us(r0, r1, i0); +} + +static void +_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRI_B(r0, r1, i0); + else + T2_LDRIN_B(r0, r1, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRI_B(r0, r1, i0); + else + LDRIN_B(r0, r1, -i0); + } + else + generic_ldxbi_i(r0, r1, i0); +} + +static void +_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSBI_A(r0, r1, i0); + else + T2_LDRSBIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSBI_A(r0, r1, i0); + else + LDRSBIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_c(r0, r1, i0); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRBI_A(r0, r1, i0); + else + T2_LDRBIN_A(r0, r1, -i0); + } + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRBI_A(r0, r1, i0); + else + LDRBIN_A(r0, r1, -i0); + } + else + generic_ldxai_uc(r0, r1, i0); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSHI_A(r0, r1, i0); + else + T2_LDRSHIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSHI_A(r0, r1, i0); + else + LDRSHIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_s(r0, r1, i0); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRHI_A(r0, r1, i0); + else + T2_LDRHIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRHI_A(r0, r1, i0); + else + LDRHIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_us(r0, r1, i0); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRI_A(r0, r1, i0); + else + T2_LDRIN_A(r0, r1, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRI_A(r0, r1, i0); + else + LDRIN_A(r0, r1, -i0); + } + else + generic_ldxai_i(r0, r1, i0); +} + +static void _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) @@ -4063,6 +4427,128 @@ _unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) } static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRBI_B(r1, r0, i0); + else + T2_STRBIN_B(r1, r0, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRBI_B(r1, r0, i0); + else + STRBIN_B(r1, r0, -i0); + } + else + generic_stxbi_c(i0, r0, r1); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_STRHI_B(r1, r0, i0); + else + T2_STRHIN_B(r1, r0, -i0); + } + else { + if (i0 >= 0) + STRHI_B(r1, r0, i0); + else + STRHIN_B(r1, r0, -i0); + } + } + else + generic_stxbi_s(i0, r0, r1); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRI_B(r1, r0, i0); + else + T2_STRIN_B(r1, r0, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRI_B(r1, r0, i0); + else + STRIN_B(r1, r0, -i0); + } + else + generic_stxbi_i(i0, r0, r1); +} + +static void +_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRBI_A(r1, r0, i0); + else + T2_STRBIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRBI_A(r1, r0, i0); + else + STRBIN_A(r1, r0, -i0); + } + else + generic_stxai_c(i0, r0, r1); +} + +static void +_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRHI_A(r1, r0, i0); + else + T2_STRHIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRHI_A(r1, r0, i0); + else + STRHIN_A(r1, r0, -i0); + } + else + generic_stxai_s(i0, r0, r1); +} + +static void +_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRI_A(r1, r0, i0); + else + T2_STRIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRI_A(r1, r0, i0); + else + STRIN_A(r1, r0, -i0); + } + else + generic_stxai_i(i0, r0, r1); +} + +static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index aa9bab7..cbbfd59 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -531,23 +531,23 @@ 4, /* hmulr_u */ 8, /* hmuli_u */ 8, /* ldxbr_c */ - 8, /* ldxbi_c */ + 4, /* ldxbi_c */ 8, /* ldxar_c */ 8, /* ldxai_c */ 8, /* ldxbr_uc */ - 8, /* ldxbi_uc */ + 4, /* ldxbi_uc */ 8, /* ldxar_uc */ 8, /* ldxai_uc */ 8, /* ldxbr_s */ - 8, /* ldxbi_s */ + 4, /* ldxbi_s */ 8, /* ldxar_s */ 8, /* ldxai_s */ 8, /* ldxbr_us */ - 8, /* ldxbi_us */ + 4, /* ldxbi_us */ 8, /* ldxar_us */ 8, /* ldxai_us */ 8, /* ldxbr_i */ - 8, /* ldxbi_i */ + 4, /* ldxbi_i */ 8, /* ldxar_i */ 8, /* ldxai_i */ 0, /* ldxbr_ui */ @@ -567,15 +567,15 @@ 20, /* ldxar_d */ 20, /* ldxai_d */ 8, /* stxbr_c */ - 8, /* stxbi_c */ + 4, /* stxbi_c */ 8, /* stxar_c */ 8, /* stxai_c */ 8, /* stxbr_s */ - 8, /* stxbi_s */ + 4, /* stxbi_s */ 8, /* stxar_s */ 8, /* stxai_s */ 8, /* stxbr_i */ - 8, /* stxbi_i */ + 4, /* stxbi_i */ 8, /* stxar_i */ 8, /* stxai_i */ 0, /* stxbr_l */ @@ -1125,23 +1125,23 @@ 4, /* hmulr_u */ 8, /* hmuli_u */ 8, /* ldxbr_c */ - 8, /* ldxbi_c */ + 4, /* ldxbi_c */ 8, /* ldxar_c */ 8, /* ldxai_c */ 8, /* ldxbr_uc */ - 8, /* ldxbi_uc */ + 4, /* ldxbi_uc */ 8, /* ldxar_uc */ 8, /* ldxai_uc */ 8, /* ldxbr_s */ - 8, /* ldxbi_s */ + 4, /* ldxbi_s */ 8, /* ldxar_s */ 8, /* ldxai_s */ 8, /* ldxbr_us */ - 8, /* ldxbi_us */ + 4, /* ldxbi_us */ 8, /* ldxar_us */ 8, /* ldxai_us */ 8, /* ldxbr_i */ - 8, /* ldxbi_i */ + 4, /* ldxbi_i */ 8, /* ldxar_i */ 8, /* ldxai_i */ 0, /* ldxbr_ui */ @@ -1161,15 +1161,15 @@ 8, /* ldxar_d */ 8, /* ldxai_d */ 8, /* stxbr_c */ - 8, /* stxbi_c */ + 4, /* stxbi_c */ 8, /* stxar_c */ 8, /* stxai_c */ 8, /* stxbr_s */ - 8, /* stxbi_s */ + 4, /* stxbi_s */ 8, /* stxar_s */ 8, /* stxai_s */ 8, /* stxbr_i */ - 8, /* stxbi_i */ + 4, /* stxbi_i */ 8, /* stxar_i */ 8, /* stxai_i */ 0, /* stxbr_l */ diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c index f890a18..4741091 100644 --- a/lib/jit_arm-vfp.c +++ b/lib/jit_arm-vfp.c @@ -292,29 +292,29 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); # define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1) # define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1) # define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1) -# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0) +# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_U,r0,r1,i0) # define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0) +# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_U|ARM_V_F64,r0,r1,i0) # define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0) +# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_U,r0,r1,i0) # define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0) +# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_U|ARM_V_F64,r0,r1,i0) # define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0) +# define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_U|ARM_M_W,r0,r1,i0) # define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0) +# define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_U|ARM_M_W|ARM_V_F64,r0,r1,i0) # define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0) +# define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_U|ARM_M_W,r0,r1,i0) # define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0) +# define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_U|ARM_M_W|ARM_V_F64,r0,r1,i0) # define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0) +# define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_P|ARM_M_W,r0,r1,i0) # define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0) +# define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_P|ARM_M_W|ARM_V_F64,r0,r1,i0) # define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0) +# define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_P|ARM_M_W,r0,r1,i0) # define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0) +# define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_P|ARM_M_W|ARM_V_F64,r0,r1,i0) # define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0) # define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0) # define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0) diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 27693d2..bd44488 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1354,23 +1354,21 @@ _emit_code(jit_state_t *_jit) break #define case_rrx(name, type) \ case jit_code_##name##i##type: \ - generic_##name##i##type(rn(node->u.w), \ - rn(node->v.w), node->w.w); \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break #define case_rrX(name, type) \ case jit_code_##name##r##type: \ - generic_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ break #define case_xrr(name, type) \ case jit_code_##name##i##type: \ - generic_##name##i##type(node->u.w, rn(node->v.w), \ - rn(node->w.w)); \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ break #define case_Xrr(name, type) \ case jit_code_##name##r##type: \ - generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ - rn(node->w.w)); \ + name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ |