author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年02月26日 10:00:53 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年02月26日 10:00:53 -0300 |
commit | b38c2bf82beb9dbe5efaa53b63b6b316d5a02c4f (patch) | |
tree | 9051c902cc093b7d77a67f0ace4762ecec66d8fe | |
parent | 346d3458456e775eedf64af3aa6c6d9816b8f8fd (diff) | |
download | lightning-b38c2bf82beb9dbe5efaa53b63b6b316d5a02c4f.tar.gz |
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | check/bit.tst | 2 | ||||
-rw-r--r-- | include/lightning/jit_mips.h | 5 | ||||
-rw-r--r-- | lib/jit_fallback.c | 6 | ||||
-rw-r--r-- | lib/jit_mips-cpu.c | 116 | ||||
-rw-r--r-- | lib/jit_mips.c | 23 |
@@ -1,3 +1,17 @@ +2023年02月26日 Paulo Andrade <pcpa@gnu.org> + + * check/bit.tst: Correct 32 bit sample ctz implementation. + * include/lightning/jit_mips.h: Add jit_cpu flags for instructions + that cannot be used in delay slot. + * lib/jit_fallback.c: Mips fallbacks now might need a flush of + instructions to get correct label addresses, due to pending + instruction candidate to delay slot. + * lib/jit_mips-cpu.c: Flush any pending instruction if it cannot + be used in the delay slot. Add calls to fallback clo, clz, cto and + ctz for mips 1. + * lib/jit_mips.c: Add code to set defaults or detect if can use + certain instructions to delay slots. + 2023年02月23日 Paulo Andrade <pcpa@gnu.org> * include/lightning/jit_private.h: Add new 'inst' field to diff --git a/check/bit.tst b/check/bit.tst index b721d5c..2d2344c 100644 --- a/check/bit.tst +++ b/check/bit.tst @@ -250,8 +250,8 @@ ctz: bnei tun %r1 0 reti __WORDSIZE tun: -#if __WORDSIZE == 64 movi %r0 0 +#if __WORDSIZE == 64 movi %r2 0xffffffff bmsr t32 %r1 %r2 rshi_u %r1 %r1 32 diff --git a/include/lightning/jit_mips.h b/include/lightning/jit_mips.h index 52aebcc..8b5cb69 100644 --- a/include/lightning/jit_mips.h +++ b/include/lightning/jit_mips.h @@ -118,6 +118,11 @@ typedef enum { typedef struct { jit_uint32_t release : 4; + /* set if lwc1, ldc1, swc1, sdc1, mtc1, mfc1, dmtc1, and dmfc1 + * can be put in delay slot */ + jit_uint32_t cop1_delay : 1; + /* set if sll can be put in delay slot */ + jit_uint32_t sll_delay : 1; } jit_cpu_t; /* diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c index 2f7f214..d8b260f 100644 --- a/lib/jit_fallback.c +++ b/lib/jit_fallback.c @@ -31,6 +31,12 @@ static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t); # endif # if defined(__arm__) # define fallback_patch_at(inst,lbl) patch_at(arm_patch_jump,inst,lbl) +# elif defined(__mips__) +# define fallback_patch_at(inst,lbl) \ + do { \ + flush(); \ + patch_at(inst, lbl); \ + } while (0); # elif defined(__ia64__) # define fallback_patch_at(inst,lbl) \ do { \ diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 1af6406..5dfa7e4 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -1030,6 +1030,9 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask, } break; case MIPS_SLL: /* 00 */ + /* If cannot have a shift in delay slot */ + if (!jit_cpu.sll_delay) + flush(); case MIPS_SRL: /* 02 */ case MIPS_SRA: /* 03 */ case MIPS_DSLL: /* 38 */ @@ -1208,6 +1211,10 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask, case MIPS_DMT: /* 05 */ case MIPS_MTH: /* 07 */ assert(i.ic.b == 0); + /* If these cop1 instructions in delay slot + * wont work */ + if (!jit_cpu.cop1_delay == 0) + flush(); if (mask & jit_class_gpr) { regs[0] = i.rt.b; regs[1] = regs[2] = 0; @@ -1405,13 +1412,24 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask, regs[1] = regs[2] = 0; } break; - case MIPS_BEQ: /* 04 */ - case MIPS_BNE: /* 05 */ - assert(i.rt.b == 0); case MIPS_LWC1: /* 31 */ case MIPS_LDC1: /* 35 */ case MIPS_SWC1: /* 39 */ case MIPS_SDC1: /* 3d */ + /* If these cop1 instructions in delay wont not work */ + if (!jit_cpu.cop1_delay == 0) + flush(); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + else + regs[0] = i.rt.b; + break; + case MIPS_BEQ: /* 04 */ + case MIPS_BNE: /* 05 */ + assert(i.rt.b == 0); if (mask & jit_class_gpr) { regs[0] = i.rs.b; regs[1] = i.rt.b; @@ -1646,73 +1664,89 @@ _bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1) static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + if (jit_mips2_p()) { #if __WORDSIZE == 32 - if (jit_mips6_p()) - CLO_R6(r0, r1); - else - CLO(r0, r1); + if (jit_mips6_p()) + CLO_R6(r0, r1); + else + CLO(r0, r1); #else - if (jit_mips6_p()) - DCLO_R6(r0, r1); - else - DCLO(r0, r1); + if (jit_mips6_p()) + DCLO_R6(r0, r1); + else + DCLO(r0, r1); #endif + } + else + fallback_clo(r0, r1); } static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + if (jit_mips2_p()) { #if __WORDSIZE == 32 - if (jit_mips6_p()) - CLZ_R6(r0, r1); - else - CLZ(r0, r1); + if (jit_mips6_p()) + CLZ_R6(r0, r1); + else + CLZ(r0, r1); #else - if (jit_mips6_p()) - DCLZ_R6(r0, r1); - else - DCLZ(r0, r1); + if (jit_mips6_p()) + DCLZ_R6(r0, r1); + else + DCLZ(r0, r1); #endif + } + else + fallback_clz(r0, r1); } static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - if (jit_mips6_p()) { + if (jit_mips2_p()) { + if (jit_mips6_p()) { #if __WORDSIZE == 32 - BITSWAP(r0, r1); - bswapr_ui(r0, r0); - CLO_R6(r0, r0); + BITSWAP(r0, r1); + bswapr_ui(r0, r0); + CLO_R6(r0, r0); #else - DBITSWAP(r0, r1); - bswapr_ul(r0, r0); - DCLO_R6(r0, r0); + DBITSWAP(r0, r1); + bswapr_ul(r0, r0); + DCLO_R6(r0, r0); #endif + } + else { + bitswap(r0, r1); + clor(r0, r0); + } } - else { - bitswap(r0, r1); - clor(r0, r0); - } + else + fallback_cto(r0, r1); } static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - if (jit_mips6_p()) { + if (jit_mips2_p()) { + if (jit_mips6_p()) { #if __WORDSIZE == 32 - BITSWAP(r0, r1); - bswapr_ui(r0, r0); - CLZ_R6(r0, r0); + BITSWAP(r0, r1); + bswapr_ui(r0, r0); + CLZ_R6(r0, r0); #else - DBITSWAP(r0, r1); - bswapr_ul(r0, r0); - DCLZ_R6(r0, r0); + DBITSWAP(r0, r1); + bswapr_ul(r0, r0); + DCLZ_R6(r0, r0); #endif + } + else { + bitswap(r0, r1); + clzr(r0, r0); + } } - else { - bitswap(r0, r1); - clzr(r0, r0); - } + else + fallback_ctz(r0, r1); } static void diff --git a/lib/jit_mips.c b/lib/jit_mips.c index b73ed61..55bc8ee 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -193,13 +193,26 @@ jit_get_cpu(void) char *ptr; char buf[128]; + /* By default assume it works. */ + jit_cpu.sll_delay = jit_cpu.cop1_delay = 1; if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) { while (fgets(buf, sizeof(buf), fp)) { - if (strncmp(buf, "isa : ", 8) == 0) { + if (strncmp(buf, "isa\t\t\t: ", 8) == 0) { if ((ptr = strstr(buf + 9, "mips64r"))) jit_cpu.release = strtoul(ptr + 7, NULL, 10); break; } + /* Just for some actual hardware tested. Below check + * for mips 1 would disable these delays anyway. */ + if (strncmp(buf, "cpu model\t\t: ", 13) == 0) { + /* ICT Loongson-2 V0.3 FPU V0.1 */ + if (strstr(buf + 13, "FPU V0.1")) + jit_cpu.sll_delay = jit_cpu.cop1_delay = 0; + /* Cavium Octeon III V0.2 FPU V0.0 */ + else if (strstr(buf + 13, "FPU V0.0")) + jit_cpu.sll_delay = jit_cpu.cop1_delay = 0; + break; + } } fclose(fp); } @@ -214,6 +227,14 @@ jit_get_cpu(void) if (!jit_cpu.release) jit_cpu.release = __mips; #endif + /* Assume all mips 1, or detected as release 1 has this problem */ + /* Note that jit_cpu is global, and can be overriden, that is, add + * the C code "jit_cpu.cop1_delay = 1;" after the call to init_jit() + * if it is functional. */ + if (jit_cpu.cop1_delay && jit_cpu.release < 2) + jit_cpu.cop1_delay = 0; + if (jit_cpu.sll_delay && jit_cpu.release < 2) + jit_cpu.sll_delay = 0; } void |