author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年04月05日 12:14:29 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年04月05日 12:14:29 -0300 |
commit | 0f3bc3da4760347c67deae0be2f7a1f6bb13ea83 (patch) | |
tree | 07a35194b1310b476bc4b15058f8fa58b51644a0 | |
parent | b5f6ef6c473656a954b774de6ffb8172c48e6254 (diff) | |
download | lightning-0f3bc3da4760347c67deae0be2f7a1f6bb13ea83.tar.gz |
-rw-r--r-- | include/lightning/jit_arm.h | 57 | ||||
-rw-r--r-- | lib/jit_arm-cpu.c | 73 | ||||
-rw-r--r-- | lib/jit_arm-swf.c | 336 | ||||
-rw-r--r-- | lib/jit_arm-vfp.c | 325 | ||||
-rw-r--r-- | lib/jit_arm.c | 74 |
diff --git a/include/lightning/jit_arm.h b/include/lightning/jit_arm.h index 0ed9535..558f553 100644 --- a/include/lightning/jit_arm.h +++ b/include/lightning/jit_arm.h @@ -126,6 +126,63 @@ typedef struct { * of that function if generating jit for a relative jump. */ jit_uint32_t exchange : 1; + /* By default assume cannot load unaligned data. + * A3.2.1 + * Unaligned data access + * An ARMv7 implementation must support unaligned data accesses by + * some load and store instructions, as Table A3-1 shows. Software + * can set the SCTLR.A bit to control whether a misaligned access by + * one of these instructions causes an Alignment fault Data Abort + * exception. + * Table A3-1 Alignment requirements of load/store instructions + * Result if check fails when + * Instructions Alignment check SCTLR.A is 0 SCTLR.A is 1 + * LDRB, LDREXB, + * LDRBT, LDRSB, + * LDRSBT, STRB, + * STREXB, STRBT, + * SWPB, TBB None - - + * LDRH, LDRHT, + * LDRSH, LDRSHT, + * STRH, STRHT, + * TBH Halfword Unaligned access Alignment fault + * LDREXH, STREXH Halfword Alignment fault Alignment fault + * LDR, LDRT, + * STR, STRT Word Unaligned access Alignment fault + * LDREX, STREX Word Alignment fault Alignment fault + * LDREXD, STREXD Doubleword Alignment fault Alignment fault + * All forms of + * LDM and STM, + * LDRD, RFE, SRS, + * STRD, SWP Word Alignment fault Alignment fault + * LDC, LDC2, + * STC, STC2 Word Alignment fault Alignment fault + * VLDM, VLDR, + * VPOP, VPUSH, + * VSTM, VSTR Word Alignment fault Alignment fault + * VLD1, VLD2, + * VLD3, VLD4, + * VST1, VST2, + * VST3, VST4, + * all with + * standard + * alignment (a) Element size Unaligned access Alignment fault + * VLD1, VLD2, + * VLD3, VLD4, + * VST1, VST2, + * VST3, VST4, + * all with + * @<align> + * specified (a) As specified by Alignment fault Alignment fault + * @<align> + * + * (a) These element and structure load/store instructions are only in + * the Advanced SIMD Extension to the ARMv7 ARM and Thumb instruction + * sets. ARMv7 does not support the pre-ARMv6 alignment model, so + * software cannot use that model with these instructions. + */ + jit_uint32_t unaligned : 1; + jit_uint32_t vfp_unaligned : 1; } jit_cpu_t; /* diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index d2f1727..080275c 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -18,9 +18,14 @@ */ #if PROTO +# define jit_unaligned_p() (jit_cpu.unaligned) +# define jit_vfp_unaligned_p() (jit_cpu.vfp_unaligned) # define stxi(i0,r0,r1) stxi_i(i0,r0,r1) # define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) # define ldr(r0,r1) ldr_i(r0,r1) +# define ldi(r0,i0) ldi_i(r0,i0) +# define str(r0,r1) str_i(r0,r1) +# define sti(i0,r0) sti_i(i0,r0) # define _s20P(d) ((d) >= -(int)0x80000 && d <= 0x7ffff) # define _s24P(d) ((d) >= -(int)0x800000 && d <= 0x7fffff) # define _u3(v) ((v) & 0x7) @@ -1174,6 +1179,14 @@ static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define unldr(r0, r1, i0) _unldr(_jit, r0, r1, i0) +static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi(r0, i0, i1) _unldi(_jit, r0, i0, i1) +static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define unldr_u(r0, r1, i0) _unldr_u(_jit, r0, r1, i0) +static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1) +static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define str_c(r0,r1) _str_c(_jit,r0,r1) static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t); # define sti_c(i0,r0) _sti_c(_jit,i0,r0) @@ -1198,6 +1211,10 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define unstr(r0, r1, i0) _unstr(_jit, r0, r1, i0) +static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1) +static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -3736,6 +3753,44 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr(r0, r1, i0); + else + generic_unldr(r0, r1, i0); +} + +static void +_unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi(r0, i0, i1); + else + generic_unldi(r0, i0, i1); +} + +static void +_unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr_u(r0, r1, i0); + else + generic_unldr_u(r0, r1, i0); +} + +static void +_unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi_u(r0, i0, i1); + else + generic_unldi_u(r0, i0, i1); +} + +static void _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) @@ -3955,6 +4010,24 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } static void +_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unstr(r0, r1, i0); + else + generic_unstr(r0, r1, i0); +} + +static void +_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + if (jit_unaligned_p()) + fallback_unsti(i0, r0, i1); + else + generic_unsti(i0, r0, i1); +} + +static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) { diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c index 2aa6a12..0b59b4e 100644 --- a/lib/jit_arm-swf.c +++ b/lib/jit_arm-swf.c @@ -128,12 +128,20 @@ static void _swf_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t); static void _swf_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_movr_f(r0,r1) _swf_movr_f(_jit,r0,r1) static void _swf_movr_f(jit_state_t*,jit_int32_t,jit_int32_t); -# define swf_movr_d(r0,r1) _swf_movr_d(_jit,r0,r1) -static void _swf_movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_movi_f(r0,i0) _swf_movi_f(_jit,r0,i0) static void _swf_movi_f(jit_state_t*,jit_int32_t,jit_float32_t); +# define swf_movr_w_f(r0, r1) _swf_movr_w_f(_jit, r0, r1) +static void _swf_movr_w_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define swf_movr_f_w(r0, r1) _swf_movr_f_w(_jit, r0, r1) +static void _swf_movr_f_w(jit_state_t*,jit_int32_t,jit_int32_t); +# define swf_movr_d(r0,r1) _swf_movr_d(_jit,r0,r1) +static void _swf_movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_movi_d(r0,i0) _swf_movi_d(_jit,r0,i0) static void _swf_movi_d(jit_state_t*,jit_int32_t,jit_float64_t); +# define swf_movr_ww_d(r0, r1, r2) _swf_movr_ww_d(_jit, r0, r1, r2) +static void _swf_movr_ww_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define swf_movr_d_ww(r0, r1, r2) _swf_movr_d_ww(_jit, r0, r1, r2) +static void _swf_movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define swf_absr_f(r0,r1) _swf_absr_f(_jit,r0,r1) static void _swf_absr_f(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_absr_d(r0,r1) _swf_absr_d(_jit,r0,r1) @@ -306,12 +314,20 @@ static void _swf_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _swf_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define swf_ldxi_d(r0,r1,i0) _swf_ldxi_d(_jit,r0,r1,i0) static void _swf_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define swf_unldr_x(r0, r1, i0) _swf_unldr_x(_jit, r0, r1, i0) +static void _swf_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define swf_unldi_x(r0, i0, i1) _swf_unldi_x(_jit, r0, i0, i1) +static void _swf_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define swf_str_f(r0,r1) _swf_str_f(_jit,r0,r1) static void _swf_str_f(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_str_d(r0,r1) _swf_str_d(_jit,r0,r1) static void _swf_str_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_sti_f(r0,i0) _swf_sti_f(_jit,r0,i0) static void _swf_sti_f(jit_state_t*,jit_word_t,jit_int32_t); +#define swf_unstr_x(r0, r1, i0) _swf_unstr_x(_jit, r0, r1, i0) +static void _swf_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define swf_unsti_x(i0, r0, i1) _swf_unsti_x(_jit, i0, r0, i1) +static void _swf_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define swf_sti_d(r0,i0) _swf_sti_d(_jit,r0,i0) static void _swf_sti_d(jit_state_t*,jit_word_t,jit_int32_t); # define swf_stxr_f(r0,r1,r2) _swf_stxr_f(_jit,r0,r1,r2) @@ -1882,70 +1898,12 @@ _swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (r0 != r1) { - if (jit_fpr_p(r0) || jit_fpr_p(r1)) - CHECK_SWF_OFFSET(); - if (jit_fpr_p(r1)) { - reg = jit_get_reg(jit_class_gpr); - swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); - if (jit_fpr_p(r0)) - swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); - else - movr(r0, rn(reg)); - jit_unget_reg(reg); - } - else if (jit_fpr_p(r0)) - swf_strin(r1, _FP_REGNO, swf_off(r0) + 8); - else - movr(r0, r1); - } -} - -static void -_swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t reg; - if (r0 != r1) { - if (jit_fpr_p(r0) || jit_fpr_p(r1)) - CHECK_SWF_OFFSET(); - if (jit_fpr_p(r1)) { - if (!jit_thumb_p() && jit_armv5e_p() && - (reg = jit_get_reg_pair()) != JIT_NOREG) { - LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8); - if (jit_fpr_p(r0)) - STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8); - else { - movr(r0, rn(reg)); - movr(r0 + 1, rn(reg) + 1); - } - jit_unget_reg_pair(reg); - } - else { - reg = jit_get_reg(jit_class_gpr); - swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); - if (jit_fpr_p(r0)) - swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); - else - movr(r0, rn(reg)); - swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4); - if (jit_fpr_p(r0)) - swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4); - else - movr(r0 + 1, rn(reg)); - jit_unget_reg(reg); - } - } - else if (jit_fpr_p(r0)) { - if (!jit_thumb_p() && jit_armv5e_p() && !(r1 & 1)) - STRDIN(r1, _FP_REGNO, swf_off(r0) + 8); - else { - swf_strin(r1, _FP_REGNO, swf_off(r0) + 8); - swf_strin(r1 + 1, _FP_REGNO, swf_off(r0) + 4); - } - } - else { - movr(r0, r1); - movr(r0 + 1, r1 + 1); - } + assert(jit_fpr_p(r0) && jit_fpr_p(r1)); + CHECK_SWF_OFFSET(); + reg = jit_get_reg(jit_class_gpr); + swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); + swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); + jit_unget_reg(reg); } } @@ -1957,54 +1915,104 @@ _swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) jit_float32_t f; } data; jit_int32_t reg; - if (jit_fpr_p(r0)) - CHECK_SWF_OFFSET(); + assert(jit_fpr_p(r0)); + CHECK_SWF_OFFSET(); data.f = i0; - if (jit_fpr_p(r0)) { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), data.i); - swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); - jit_unget_reg(reg); - } - else - movi(r0, data.i); + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); + jit_unget_reg(reg); } static void -_swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +_swf_movr_w_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(jit_fpr_p(r0)); + CHECK_SWF_OFFSET(); + swf_strin(r1, _FP_REGNO, swf_off(r0) + 8); +} + +static void +_swf_movr_f_w(jit_state_t *_jit ,jit_int32_t r0, jit_int32_t r1) +{ + assert(jit_fpr_p(r1)); + CHECK_SWF_OFFSET(); + swf_ldrin(r0, _FP_REGNO, swf_off(r1) + 8); +} + +static void +_swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - union { - jit_int32_t i[2]; - jit_float64_t d; - } data; - if (jit_fpr_p(r0)) + if (r0 != r1) { + assert(jit_fpr_p(r0) && jit_fpr_p(r1)); CHECK_SWF_OFFSET(); - data.d = i0; - if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p() && (reg = jit_get_reg_pair()) != JIT_NOREG) { - movi(rn(reg), data.i[0]); - movi(rn(reg) + 1, data.i[1]); + LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8); STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8); jit_unget_reg_pair(reg); } else { reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), data.i[0]); + swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); - movi(rn(reg), data.i[1]); + swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4); swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4); jit_unget_reg(reg); } } +} + +static void +_swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + jit_int32_t reg; + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + assert(jit_fpr_p(r0)); + CHECK_SWF_OFFSET(); + data.d = i0; + if (!jit_thumb_p() && jit_armv5e_p() && + (reg = jit_get_reg_pair()) != JIT_NOREG) { + movi(rn(reg), data.i[0]); + movi(rn(reg) + 1, data.i[1]); + STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8); + jit_unget_reg_pair(reg); + } else { - movi(r0, data.i[0]); - movi(r0 + 1, data.i[1]); + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i[0]); + swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); + movi(rn(reg), data.i[1]); + swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4); + jit_unget_reg(reg); } } static void +_swf_movr_ww_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_fpr_p(r0)); + CHECK_SWF_OFFSET(); + swf_strin(r1, _FP_REGNO, swf_off(r0) + 8); + swf_strin(r2, _FP_REGNO, swf_off(r0) + 4); +} + +static void +_swf_movr_d_ww(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_fpr_p(r2)); + CHECK_SWF_OFFSET(); + swf_ldrin(r0, _FP_REGNO, swf_off(r2) + 8); + swf_ldrin(r1, _FP_REGNO, swf_off(r2) + 4); +} + +static void _swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; @@ -2402,6 +2410,80 @@ _swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_swf_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i0 == 4 || i0 == 8); + if (jit_unaligned_p()) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + unldr(r2, r1, 4); + swf_movr_w_f(r0, r2); + } + else { + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + unldr(r2, r1, 4); + addi(r3, r1, 4); + unldr(r3, r3, 4); +#else + unldr(r3, r1, 4); + addi(r2, r1, 4); + unldr(r2, r2, 4); +#endif + swf_movr_ww_d(r0, r2, r3); + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + swf_ldr_f(r0, r1); + else + swf_ldr_d(r0, r1); + } +} + +static void +_swf_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i1 == 4 || i1 == 8); + if (jit_unaligned_p()) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + unldi(r2, i0, 4); + swf_movr_w_f(r0, r2); + } + else { + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + unldi(r2, i0, 4); + unldi(r3, i0 + 4, 4); +#else + unldi(r3, i0, 4); + unldi(r2, i0 + 4, 4); +#endif + swf_movr_ww_d(r0, r3, r2); + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + swf_ldi_f(r0, i0); + else + swf_ldi_d(r0, i0); + } +} + +static void _swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t rg0, rg1; @@ -2533,6 +2615,80 @@ _swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } static void +_swf_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i0 == 4 || i0 == 8); + if (jit_unaligned_p()) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + swf_movr_f_w(r2, r1); + unstr(r0, r2, 4); + } + else { + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); + swf_movr_d_ww(r2, r3, r1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + unstr(r0, r2, 4); + addi(r2, r0, 4); + unstr(r2, r3, 4); +#else + unstr(r0, r3, 4); + addi(r3, r0, 4); + unstr(r3, r2, 4); +#endif + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + swf_str_f(r0, r1); + else + swf_str_d(r0, r1); + } +} + +static void +_swf_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i1 == 4 || i1 == 8); + if (jit_unaligned_p()) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + swf_movr_f_w(r2, r0); + unsti(i0, r2, 4); + } + else { + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); + swf_movr_d_ww(r2, r3, r0); +#if __BYTE_ORDER == __LITTLE_ENDIAN + unsti(i0, r3, 4); + unsti(i0 + 4, r2, 4); +#else + unsti(i0, r2, 4); + unsti(i0 + 4, r3, 4); +#endif + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i1 == 4) + swf_sti_f(i0, r0); + else + swf_sti_d(i0, r0); + } +} + +static void _swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t rg0, rg1; diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c index b5bb27a..ade905e 100644 --- a/lib/jit_arm-vfp.c +++ b/lib/jit_arm-vfp.c @@ -473,12 +473,17 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t); # define vfp_movr_f(r0,r1) _vfp_movr_f(_jit,r0,r1) static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t); -# define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1) -static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define vfp_movi_f(r0,i0) _vfp_movi_f(_jit,r0,i0) static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t); +# define vfp_movr_w_f(r0, r1) VMOV_S_A(r0, r1) +# define vfp_movr_f_w(r0, r1) VMOV_A_S(r0, r1) +# define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1) +static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define vfp_movi_d(r0,i0) _vfp_movi_d(_jit,r0,i0) static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t); +# define vfp_movr_ww_d(r0, r1, r2) VMOV_D_AA(r0, r1, r2) +# define vfp_movr_d_ww(r0, r1, r2) VMOV_AA_D(r0, r1, r2) +static void _vfp_movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define vfp_extr_f(r0,r1) _vfp_extr_f(_jit,r0,r1) static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t); # define vfp_extr_d(r0,r1) _vfp_extr_d(_jit,r0,r1) @@ -795,6 +800,10 @@ static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define vfp_ldxi_d(r0,r1,i0) _vfp_ldxi_d(_jit,r0,r1,i0) static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define vfp_unldr_x(r0, r1, i0) _vfp_unldr_x(_jit, r0, r1, i0) +static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define vfp_unldi_x(r0, i0, i1) _vfp_unldi_x(_jit, r0, i0, i1) +static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0) # define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0) # define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0) @@ -809,6 +818,10 @@ static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define vfp_stxi_d(i0,r0,r1) _vfp_stxi_d(_jit,i0,r0,r1) static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define vfp_unstr_x(r0, r1, i0) _vfp_unstr_x(_jit, r0, r1, i0) +static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define vfp_unsti_x(i0, r0, i1) _vfp_unsti_x(_jit, i0, r0, i1) +static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1) static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t); #endif @@ -1239,39 +1252,9 @@ _vfp_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - if (r0 != r1) { - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VMOV_F32(r0, r1); - else - VMOV_A_S(r0, r1); - } - else if (jit_fpr_p(r0)) - VMOV_S_A(r0, r1); - else - movr(r0, r1); - } -} - -static void -_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - if (r0 != r1) { - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VMOV_F64(r0, r1); - else - VMOV_AA_D(r0, r0 + 1, r1); - } - else if (jit_fpr_p(r0)) - VMOV_D_AA(r0, r1, r1 + 1); - else { - /* minor consistency check */ - assert(r0 + 1 != r1 && r0 -1 != r1); - movr(r0, r1); - movr(r0 + 1, r1 + 1); - } - } + assert(jit_fpr_p(r0) && jit_fpr_p(r1)); + if (r0 != r1) + VMOV_F32(r0, r1); } static void @@ -1284,22 +1267,27 @@ _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) jit_int32_t reg; jit_int32_t code; u.f = i0; - if (jit_fpr_p(r0)) { - /* float arguments are packed, for others, - * lightning only address even registers */ - if (!(r0 & 1) && (r0 - 32) >= 0 && - ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 || - (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1)) - VIMM(code, r0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), u.i); - VMOV_S_A(r0, rn(reg)); - jit_unget_reg(reg); - } + assert(jit_fpr_p(r0)); + /* float arguments are packed, for others, + * lightning only address even registers */ + if (!(r0 & 1) && (r0 - 32) >= 0 && + ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 || + (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1)) + VIMM(code, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), u.i); + VMOV_S_A(r0, rn(reg)); + jit_unget_reg(reg); } - else - movi(r0, u.i); +} + +static void +_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(jit_fpr_p(r0) && jit_fpr_p(r1)); + if (r0 != r1) + VMOV_F64(r0, r1); } static void @@ -1312,23 +1300,23 @@ _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) jit_int32_t code; jit_int32_t rg0, rg1; u.d = i0; - if (jit_fpr_p(r0)) { - if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || - (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) - VIMM(code, r0); - else { - rg0 = jit_get_reg(jit_class_gpr); - rg1 = jit_get_reg(jit_class_gpr); - movi(rn(rg0), u.i[0]); - movi(rn(rg1), u.i[1]); - VMOV_D_AA(r0, rn(rg0), rn(rg1)); - jit_unget_reg(rg1); - jit_unget_reg(rg0); - } - } +# if __BYTE_ORDER == __BIG_ENDIAN + code = u.i[0]; + u.i[0] = u.i[1]; + u.i[1] = code; +# endif + assert(jit_fpr_p(r0)); + if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || + (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) + VIMM(code, r0); else { - movi(r0, u.i[0]); - movi(r0 + 1, u.i[1]); + rg0 = jit_get_reg(jit_class_gpr); + rg1 = jit_get_reg(jit_class_gpr); + movi(rn(rg0), u.i[0]); + movi(rn(rg1), u.i[1]); + VMOV_D_AA(r0, rn(rg0), rn(rg1)); + jit_unget_reg(rg1); + jit_unget_reg(rg0); } } @@ -2208,6 +2196,107 @@ _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_vfp_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i0 == 4 || i0 == 8); + if (jit_vfp_unaligned_p()) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + if (jit_unaligned_p()) + unldr(r2, r1, 4); + else + ldr(r2, r1); + vfp_movr_w_f(r0, r2); + } + else { + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (jit_unaligned_p()) { + unldr(r2, r1, 4); + addi(r3, r1, 4); + unldr(r3, r3, 4); + } + else { + ldr(r2, r1); + ldxi(r3, r1, 4); + } +#else + if (jit_unaligned_p()) { + unldr(r3, r1, 4); + addi(r2, r1, 4); + unldr(r2, r2, 4); + } + else { + ldr(r3, r1); + ldxi(r2, r1, 4); + } +#endif + vfp_movr_ww_d(r0, r2, r3); + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + vfp_ldr_f(r0, r1); + else + vfp_ldr_d(r0, r1); + } +} + +static void +_vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i1 == 4 || i1 == 8); + if (jit_vfp_unaligned_p()) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + unldi(r2, i0, 4); + vfp_movr_w_f(r0, r2); + } + else { + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (jit_unaligned_p()) { + unldi(r2, i0, 4); + unldi(r3, i0 + 4, 4); + } + else { + ldi(r2, i0); + ldi(r3, i0 + 4); + } +#else + if (jit_unaligned_p()) { + unldi(r3, i0, 4); + unldi(r2, i0 + 4, 4); + } + else { + ldi(r3, i0); + ldi(r2, i0 + 4); + } +#endif + vfp_movr_ww_d(r0, r3, r2); + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + vfp_ldi_f(r0, i0); + else + vfp_ldi_d(r0, i0); + } +} + +static void _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; @@ -2337,6 +2426,110 @@ _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } static void +_vfp_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i0 == 4 || i0 == 8); + if (jit_vfp_unaligned_p()) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + vfp_movr_f_w(r2, r1); + if (jit_unaligned_p()) + unstr(r0, r2, 4); + else + str(r0, r2); + } + else { + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); + vfp_movr_d_ww(r2, r3, r1); +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (jit_unaligned_p()) { + unstr(r0, r2, 4); + addi(r2, r0, 4); + unstr(r2, r3, 4); + } + else { + str(r0, r2); + stxi(4, r0, r3); + } +#else + if (jit_unaligned_p()) { + unstr(r0, r3, 4); + addi(r3, r0, 4); + unstr(r3, r2, 4); + } + else { + str(r0, r3); + stxi(4, r0, r2); + } +#endif + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i0 == 4) + vfp_str_f(r0, r1); + else + vfp_str_d(r0, r1); + } +} + +static void +_vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + jit_int32_t t1, r3; + assert(i1 == 4 || i1 == 8); + if (jit_vfp_unaligned_p()) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + vfp_movr_f_w(r2, r0); + if (jit_unaligned_p()) + unsti(i0, r2, 4); + else + sti(i0, r2); + } + else { + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); + vfp_movr_d_ww(r2, r3, r0); +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (jit_unaligned_p()) { + unsti(i0, r3, 4); + unsti(i0 + 4, r2, 4); + } + else { + sti(i0, r3); + sti(i0 + 4, r2); + } +#else + if (jit_unaligned_p()) { + unsti(i0, r2, 4); + unsti(i0 + 4, r3, 4); + } + else { + sti(i0, r2); + sti(i0 + 4, r3); + } +#endif + jit_unget_reg(t1); + } + jit_unget_reg(t0); + } + else { + if (i1 == 4) + vfp_sti_f(i0, r0); + else + vfp_sti_d(i0, r0); + } +} + +static void _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; diff --git a/lib/jit_arm.c b/lib/jit_arm.c index a7cb42b..9507dd2 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -240,6 +240,11 @@ jit_get_cpu(void) /* do not generate hardware integer division by default */ if (jit_cpu.version == 7) jit_cpu.extend = 0; + + /* By default generate extra instructions for unaligned load/store. */ + jit_cpu.unaligned = 1; + /* Linux should only not handle unaligned vfp load/store */ + jit_cpu.vfp_unaligned = 1; } void @@ -1636,6 +1641,18 @@ _emit_code(jit_state_t *_jit) case_rrw(ldx, _us); case_rrr(ldx, _i); case_rrw(ldx, _i); + case jit_code_unldr: + unldr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi: + unldi(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_unldr_u: + unldr_u(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_u: + unldi_u(rn(node->u.w), node->v.w, node->w.w); + break; case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1648,6 +1665,12 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _s); case_rrr(stx, _i); case_wrr(stx, _i); + case jit_code_unstr: + unstr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti: + unsti(node->u.w, rn(node->v.w), node->w.w); + break; case_rr(hton, _us); case_rr(hton, _ui); case_rr(bswap, _us); @@ -1773,10 +1796,34 @@ _emit_code(jit_state_t *_jit) case_vw(ld, _f); case_vvv(ldx, _f); case_vvw(ldx, _f); + case jit_code_unldr_x: + if (jit_swf_p()) + swf_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + else + vfp_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_x: + if (jit_swf_p()) + swf_unldi_x(rn(node->u.w), node->v.w, node->w.w); + else + vfp_unldi_x(rn(node->u.w), node->v.w, node->w.w); + break; case_vv(st, _f); case_wv(st, _f); case_vvv(stx, _f); case_wvv(stx, _f); + case jit_code_unstr_x: + if (jit_swf_p()) + swf_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + else + vfp_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti_x: + if (jit_swf_p()) + swf_unsti_x(node->u.w, rn(node->v.w), node->w.w); + else + vfp_unsti_x(node->u.w, rn(node->v.w), node->w.w); + break; case_vv(mov, _f); case jit_code_movi_f: assert_data(node); @@ -2062,41 +2109,34 @@ _emit_code(jit_state_t *_jit) break; case jit_code_movr_w_f: if (jit_swf_p()) - swf_movr_f(rn(node->u.w), rn(node->v.w)); + swf_movr_w_f(rn(node->u.w), rn(node->v.w)); else - vfp_movr_f(rn(node->u.w), rn(node->v.w)); + vfp_movr_w_f(rn(node->u.w), rn(node->v.w)); break; case jit_code_movr_f_w: if (jit_swf_p()) - swf_movr_f(rn(node->u.w), rn(node->v.w)); + swf_movr_f_w(rn(node->u.w), rn(node->v.w)); else - vfp_movr_f(rn(node->u.w), rn(node->v.w)); + vfp_movr_f_w(rn(node->u.w), rn(node->v.w)); break; case jit_code_movi_f_w: assert_data(node); - if (jit_swf_p()) - swf_movi_f(rn(node->u.w), node->v.f); - else - vfp_movi_f(rn(node->u.w), node->v.f); + movi_f_w(rn(node->u.w), node->v.f); break; case jit_code_movr_ww_d: if (jit_swf_p()) - swf_movr_d(rn(node->u.w), rn(node->v.w)); + swf_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); else - vfp_movr_d(rn(node->u.w), rn(node->v.w)); + vfp_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); break; case jit_code_movr_d_ww: if (jit_swf_p()) - swf_movr_d(rn(node->u.w), rn(node->w.w)); + swf_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); else - vfp_movr_d(rn(node->u.w), rn(node->w.w)); + vfp_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); break; case jit_code_movi_d_ww: - assert_data(node); - if (jit_swf_p()) - swf_movi_d(rn(node->u.w), node->w.d); - else - vfp_movi_d(rn(node->u.w), node->w.d); + movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d); break; case jit_code_va_start: vastart(rn(node->u.w)); |