lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
path: root/lib/jit_mips-cpu.c
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月05日 12:33:40 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月05日 12:33:40 -0300
commit7266e1ba30bd9469390b6272b1940255c444d302 (patch)
tree2a76aecaf63ba3a97c1c25ec686448e72a70e33e /lib/jit_mips-cpu.c
parent7f61b4beb3a4511eacc586161912b4124fbea237 (diff)
downloadlightning-7266e1ba30bd9469390b6272b1940255c444d302.tar.gz
mips: Implement unaligned memory access.
Note that Linux kernels should trap and handle unaligned memory access. For the moment it does not rely on this behavior by default. To change it, add the C code: jit_cpu.unaligned = 1; after calling init_jit(). Note that this is only an option if jit_cpu.version is 5 or lower, as mips release 6 or newer removes the instructions to use unaligned memory access. This has not yet been fully optimized for mips 6. The logic should be to construct special instructions for smaller loads or stores and load/store the unaligned value without trapping.
Diffstat (limited to 'lib/jit_mips-cpu.c')
-rw-r--r--lib/jit_mips-cpu.c 307
1 files changed, 307 insertions, 0 deletions
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 20481f8..620acbb 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -68,6 +68,7 @@ typedef union {
} jit_instr_t;
#define jit_mips2_p() (jit_cpu.release >= 2)
#define jit_mips6_p() (jit_cpu.release >= 6)
+#define jit_unaligned_p() (jit_cpu.unaligned)
# define _ZERO_REGNO 0
# define _T0_REGNO 0x08
# define _T1_REGNO 0x09
@@ -180,6 +181,8 @@ typedef union {
# define MIPS_SH 0x29
# define MIPS_SWL 0x2a
# define MIPS_SW 0x2b
+# define MIPS_SDL 0x2c
+# define MIPS_SDR 0x2d
# define MIPS_SWR 0x2e
# define MIPS_CACHE 0x2f
# define MIPS_LL 0x30
@@ -437,20 +440,28 @@ static void _nop(jit_state_t*,jit_int32_t);
# define LWPC(rs,im) hriW(MIPS_PCREL,rs,1,im)
# define LWU(rt,of,rb) hrri(MIPS_LWU,rb,rt,of)
# define LWUPC(rs,im) hriW(MIPS_PCREL,rs,2,im)
+# define LWL(rt,of,rb) hrri(MIPS_LWL,rb,rt,of)
+# define LWR(rt,of,rb) hrri(MIPS_LWR,rb,rt,of)
# define LD(rt,of,rb) hrri(MIPS_LD,rb,rt,of)
# define LDPC(rs,im) hriD(MIPS_PCREL,rs,6,im)
# define LL(rt,of,rb) hrri(MIPS_LL,rb,rt,of)
# define LL_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,54)
# define LLD(rt,of,rb) hrri(MIPS_LLD,rb,rt,of)
# define LLD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,55)
+# define LDL(rt,of,rb) hrri(MIPS_LDL,rb,rt,of)
+# define LDR(rt,of,rb) hrri(MIPS_LDR,rb,rt,of)
# define SB(rt,of,rb) hrri(MIPS_SB,rb,rt,of)
# define SH(rt,of,rb) hrri(MIPS_SH,rb,rt,of)
# define SW(rt,of,rb) hrri(MIPS_SW,rb,rt,of)
+# define SWL(rt,of,rb) hrri(MIPS_SWL,rb,rt,of)
+# define SWR(rt,of,rb) hrri(MIPS_SWR,rb,rt,of)
# define SD(rt,of,rb) hrri(MIPS_SD,rb,rt,of)
# define SC(rt,of,rb) hrri(MIPS_SC,rb,rt,of)
# define SC_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,38)
# define SCD(rt,of,rb) hrri(MIPS_SCD,rb,rt,of)
# define SCD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,39)
+# define SDL(rt,of,rb) hrri(MIPS_SDL,rb,rt,of)
+# define SDR(rt,of,rb) hrri(MIPS_SDR,rb,rt,of)
# define WSBH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
# define SEB(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
# define SEH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
@@ -727,6 +738,14 @@ static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0)
static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# endif
+# define unldr(r0, r1, i0) _unldr(_jit, r0, r1, i0)
+static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unldi(r0, i0, i1) _unldi(_jit, r0, i0, i1)
+static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+# define unldr_u(r0, r1, i0) _unldr_u(_jit, r0, r1, i0)
+static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1)
+static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
# define str_c(r0,r1) SB(r1,0,r0)
# define sti_c(i0,r0) _sti_c(_jit,i0,r0)
static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
@@ -759,6 +778,10 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1)
static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# endif
+# define unstr(r0, r1, i0) _unstr(_jit, r0, r1, i0)
+static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1)
+static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
@@ -1417,6 +1440,8 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
case MIPS_ORI: /* 0d */
case MIPS_XORI: /* 0e */
case MIPS_DADDIU: /* 18 */
+ case MIPS_LDL: /* 1a */
+ case MIPS_LDR: /* 1b */
case MIPS_LB: /* 20 */
case MIPS_LH: /* 21 */
case MIPS_LW: /* 23 */
@@ -1434,6 +1459,21 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
regs[2] = 0;
}
break;
+ case MIPS_LWL: /* 22 */
+ case MIPS_LWR: /* 26 */
+ if (!jit_cpu.lwl_lwr_delay)
+ flush();
+ case MIPS_SWL: /* 2a */
+ case MIPS_SWR: /* 2e */
+ case MIPS_SDL: /* 2c */
+ case MIPS_SDR: /* 2d */
+ assert(!(jit_mips6_p()));
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ break;
case MIPS_LL: /* 30 */
case MIPS_LLD: /* 34 */
case MIPS_SC: /* 38 */
@@ -2256,16 +2296,19 @@ _xlshr(jit_state_t *_jit, jit_bool_t sign,
zero = beqi(_jit->pc.w, t3, 0);
over = beqi(_jit->pc.w, t3, __WORDSIZE);
done = jmpi(_jit->pc.w, 1);
+ flush();
patch_at(over, _jit->pc.w);
/* overflow */
movi(r0, 0);
done_over = jmpi(_jit->pc.w, 1);
/* zero */
+ flush();
patch_at(zero, _jit->pc.w);
if (sign)
rshi(r1, t2, __WORDSIZE - 1);
else
movi(r1, 0);
+ flush();
patch_at(done, _jit->pc.w);
patch_at(done_over, _jit->pc.w);
}
@@ -2355,6 +2398,7 @@ _xrshr(jit_state_t *_jit, jit_bool_t sign,
zero = beqi(_jit->pc.w, t3, 0);
over = beqi(_jit->pc.w, t3, __WORDSIZE);
done = jmpi(_jit->pc.w, 1);
+ flush();
patch_at(over, _jit->pc.w);
/* underflow */
if (sign)
@@ -2363,11 +2407,13 @@ _xrshr(jit_state_t *_jit, jit_bool_t sign,
movi(r0, 0);
done_over = jmpi(_jit->pc.w, 1);
/* zero */
+ flush();
patch_at(zero, _jit->pc.w);
if (sign)
rshi(r1, t2, __WORDSIZE - 1);
else
movi(r1, 0);
+ flush();
patch_at(done, _jit->pc.w);
patch_at(done_over, _jit->pc.w);
jit_unget_reg(s1);
@@ -2967,6 +3013,199 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
#endif
+#if __WORDSIZE == 32
+# define LOAD_LEFT LWL
+# define LOAD_RIGHT LWR
+#else
+# define LOAD_LEFT LDL
+# define LOAD_RIGHT LDR
+#endif
+static void
+_unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_word_t cross, done;
+ jit_int32_t t0, r2, t1, r3;
+ if (jit_unaligned_p()) {
+ assert(i0 >= 1 && i0 <= sizeof(jit_word_t));
+ if (i0 == 1)
+ ldr_c(r0, r1);
+ else {
+ t0 = jit_get_reg(jit_class_gpr); r2 = rn(t0);
+ t1 = jit_get_reg(jit_class_gpr); r3 = rn(t1);
+ movr(r3, r1);
+ andi(r2, r1, sizeof(jit_word_t) - 1);
+ LOAD_LEFT(r0, 0, r3);
+ cross = bgei(_jit->pc.w, r2, sizeof(jit_word_t) - (i0 - 1));
+ done = jmpi(_jit->pc.w, 1);
+ flush();
+ patch_at(cross, _jit->pc.w);
+ lshi(r2, r2, 3);
+ rshr(r0, r0, r2);
+ lshr(r0, r0, r2);
+ LOAD_RIGHT(r2, sizeof(jit_word_t) - 1, r3);
+ orr(r0, r0, r2);
+ flush();
+ patch_at(done, _jit->pc.w);
+ switch (i0) {
+ case 2:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ extr_s(r0, r0);
+#else
+ rshi(r0, r0, __WORDSIZE - 16);
+#endif
+ break;
+ case 3:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 24);
+#endif
+ rshi(r0, r0, __WORDSIZE - 24);
+ break;
+#if __WORDSIZE == 32
+ default:
+#else
+ case 4:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ extr_i(r0, r0);
+# else
+ rshi(r0, r0, __WORDSIZE - 32);
+# endif
+#endif
+ break;
+#if __WORDSIZE == 64
+ case 5:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 40);
+# endif
+ rshi(r0, r0, __WORDSIZE - 40);
+ break;
+ case 6:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 48);
+# endif
+ rshi(r0, r0, __WORDSIZE - 48);
+ break;
+ case 7:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 56);
+# endif
+ rshi(r0, r0, __WORDSIZE - 56);
+ break;
+ default:
+ break;
+#endif
+ }
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ generic_unldr(r0, r1, i0);
+}
+
+static void
+_unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0, r2;
+ if (jit_unaligned_p())
+ fallback_unldi(r0, i0, i1);
+ else
+ generic_unldi(r0, i0, i1);
+}
+
+static void
+_unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_word_t cross, done;
+ jit_int32_t t0, r2, t1, r3;
+ if (jit_unaligned_p()) {
+ assert(i0 >= 1 && i0 <= sizeof(jit_word_t));
+ if (i0 == 1)
+ ldr_uc(r0, r1);
+ else {
+ t0 = jit_get_reg(jit_class_gpr); r2 = rn(t0);
+ t1 = jit_get_reg(jit_class_gpr); r3 = rn(t1);
+ movr(r3, r1);
+ andi(r2, r1, sizeof(jit_word_t) - 1);
+ LOAD_LEFT(r0, 0, r3);
+ cross = bgei(_jit->pc.w, r2, sizeof(jit_word_t) - (i0 - 1));
+ done = jmpi(_jit->pc.w, 1);
+ flush();
+ patch_at(cross, _jit->pc.w);
+ lshi(r2, r2, 3);
+ rshr(r0, r0, r2);
+ lshr(r0, r0, r2);
+ LOAD_RIGHT(r2, sizeof(jit_word_t) - 1, r3);
+ orr(r0, r0, r2);
+ flush();
+ patch_at(done, _jit->pc.w);
+ switch (i0) {
+ case 2:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ extr_us(r0, r0);
+#else
+ rshi_u(r0, r0, __WORDSIZE - 16);
+#endif
+ break;
+ case 3:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 24);
+#endif
+ rshi_u(r0, r0, __WORDSIZE - 24);
+ break;
+#if __WORDSIZE == 32
+ default:
+#else
+ case 4:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ extr_ui(r0, r0);
+# else
+ rshi_u(r0, r0, __WORDSIZE - 32);
+# endif
+#endif
+ break;
+#if __WORDSIZE == 64
+ case 5:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 40);
+# endif
+ rshi_u(r0, r0, __WORDSIZE - 40);
+ break;
+ case 6:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 48);
+# endif
+ rshi_u(r0, r0, __WORDSIZE - 48);
+ break;
+ case 7:
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ lshi(r0, r0, __WORDSIZE - 56);
+# endif
+ rshi_u(r0, r0, __WORDSIZE - 56);
+ break;
+ default:
+ break;
+#endif
+ }
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ generic_unldr_u(r0, r1, i0);
+}
+#undef LOAD_LEFT
+#undef LOAD_RIGHT
+
+static void
+_unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0, r2;
+ if (jit_unaligned_p())
+ fallback_unldi_u(r0, i0, i1);
+ else
+ generic_unldi_u(r0, i0, i1);
+}
+
static void
_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
@@ -3124,6 +3363,74 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
#endif
static void
+_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_word_t cross, done;
+ jit_word_t t0, t1, r2, r3;
+ assert(i0 > 0 && i0 <= sizeof(jit_word_t));
+ if (jit_unaligned_p()) {
+ switch (i0) {
+ case 4:
+ t0 = jit_get_reg(jit_class_gpr);
+ r2 = rn(t0);
+ SWL(r1, 0, r0);
+ andi(r2, r0, 3);
+ cross = bnei(_jit->pc.w, r2, 0);
+ done = jmpi(_jit->pc.w, 0);
+ flush();
+ patch_at(cross, _jit->pc.w);
+ SWR(r1, 3, r0);
+ flush();
+ patch_at(done, _jit->pc.w);
+ jit_unget_reg(t0);
+ break;
+#if __WORDSIZE == 64
+ case 8:
+ t0 = jit_get_reg(jit_class_gpr);
+ r2 = rn(t0);
+ SDL(r1, 0, r0);
+ andi(r2, r0, 7);
+ cross = bnei(_jit->pc.w, r2, 0);
+ done = jmpi(_jit->pc.w, 0);
+ flush();
+ patch_at(cross, _jit->pc.w);
+ SDR(r1, 7, r0);
+ flush();
+ patch_at(done, _jit->pc.w);
+ jit_unget_reg(t0);
+ break;
+#endif
+ default:
+ /* Cost of loading memory contents, creating masks, and'ing,
+ * and or'ing values to use SW* or SD* is larger than using
+ * fallback. */
+ fallback_unstr(r0, r1, i0);
+ break;
+ }
+ }
+ else
+ generic_unstr(r0, r1, i0);
+}
+
+static void
+_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+ jit_int32_t reg;
+ if (jit_unaligned_p()) {
+ if (i1 == 4 || i1 == 8) {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ unstr(rn(reg), r0, i1);
+ jit_unget_reg(reg);
+ }
+ else
+ fallback_unsti(i0, r0, i1);
+ }
+ else
+ generic_unsti(i0, r0, i1);
+}
+
+static void
_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_mips2_p()) {
generated by cgit v1.2.3 (git 2.25.1) at 2025年09月10日 23:50:30 +0000

AltStyle によって変換されたページ (->オリジナル) /