-rw-r--r-- | lib/jit_mips-cpu.c | 222 | ||||
-rw-r--r-- | lib/jit_mips-sz.c | 86 | ||||
-rw-r--r-- | lib/jit_mips.c | 8 |
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 58f17af..20481f8 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -623,6 +623,26 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif +# define qlshr(r0,r1,r2,r3) xlshr(1,r0,r1,r2,r3) +# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3) +# define xlshr(s,r0,r1,r2,r3) _xlshr(_jit,s,r0,r1,r2,r3) +static void +_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0) +# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0) +# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0) +static void +_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3) +# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3) +# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3) +static void +_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0) +# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0) +# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0) +static void +_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); # define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0) @@ -2198,6 +2218,208 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) #endif static void +_xlshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_bool_t branch; + jit_word_t over, zero, done, done_over; + jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) { + t1 = rn(s1); + branch = 0; + } + else + branch = 1; + rsbi(t0, t3, __WORDSIZE); + lshr(r0, t2, t3); + if (sign) + rshr(r1, t2, t0); + else + rshr_u(r1, t2, t0); + if (branch) { + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = jmpi(_jit->pc.w, 1); + patch_at(over, _jit->pc.w); + /* overflow */ + movi(r0, 0); + done_over = jmpi(_jit->pc.w, 1); + /* zero */ + patch_at(zero, _jit->pc.w); + if (sign) + rshi(r1, t2, __WORDSIZE - 1); + else + movi(r1, 0); + patch_at(done, _jit->pc.w); + patch_at(done_over, _jit->pc.w); + } + else { + if (sign) + rshi(t0, t2, __WORDSIZE - 1); + else + movi(t0, 0); + /* zero? */ + movzr(r1, t0, t3); + /* Branchless but 4 bytes longer than branching fallback */ + if (sign) + movi(t0, 0); + /* overflow? */ + eqi(t1, t3, __WORDSIZE); + movnr(r0, t0, t1); + jit_unget_reg(s1); + } + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xlshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + if (sign) + rshi(r1, r2, __WORDSIZE - i0); + else + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } +} + +static void +_xrshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_bool_t branch; + jit_word_t over, zero, done, done_over; + jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) { + t1 = rn(s1); + branch = 0; + } + else + branch = 1; + rsbi(t0, t3, __WORDSIZE); + if (sign) + rshr(r0, t2, t3); + else + rshr_u(r0, t2, t3); + lshr(r1, t2, t0); + if (branch) { + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = jmpi(_jit->pc.w, 1); + patch_at(over, _jit->pc.w); + /* underflow */ + if (sign) + rshi(r0, t2, __WORDSIZE - 1); + else + movi(r0, 0); + done_over = jmpi(_jit->pc.w, 1); + /* zero */ + patch_at(zero, _jit->pc.w); + if (sign) + rshi(r1, t2, __WORDSIZE - 1); + else + movi(r1, 0); + patch_at(done, _jit->pc.w); + patch_at(done_over, _jit->pc.w); + jit_unget_reg(s1); + } + else { + /* zero? */ + if (sign) + rshi(t0, t2, __WORDSIZE - 1); + else + movi(t0, 0); + movzr(r1, t0, t3); + /* underflow? */ + eqi(t1, t3, __WORDSIZE); + movnr(r0, t0, t1); + jit_unget_reg(s1); + } + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xrshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + if (sign) + rshi(r0, r2, __WORDSIZE - 1); + else + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + lshi(r1, r2, __WORDSIZE - i0); + if (sign) + rshi(r0, r2, i0); + else + rshi_u(r0, r2, i0); + } +} + +static void _lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t reg; diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index e1c944b..169ae93 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -456,9 +456,46 @@ 4, /* lroti */ 4, /* rrotr */ 4, /* rroti */ - 8, /* ext */ - 4, /* ext_u */ - 4, /* dep */ + 8, /* extr */ + 4, /* extr_u */ + 4, /* depr */ + 8, /* depi */ + 4, /* negi */ + 4, /* comi */ + 4, /* exti_c */ + 4, /* exti_uc */ + 4, /* exti_s */ + 4, /* exti_us */ + 0, /* exti_i */ + 0, /* exti_ui */ + 4, /* bswapi_us */ + 8, /* bswapi_ui */ + 0, /* bswapi_ul */ + 4, /* htoni_us */ + 8, /* htoni_ui */ + 0, /* htoni_ul */ + 0, /* negi_f */ + 0, /* absi_f */ + 0, /* sqrti_f */ + 0, /* negi_d */ + 0, /* absi_d */ + 0, /* sqrti_d */ + 4, /* cloi */ + 4, /* clzi */ + 4, /* ctoi */ + 4, /* ctzi */ + 8, /* rbiti */ + 4, /* popcnti */ + 4, /* exti */ + 4, /* exti_u */ + 48, /* qlshr */ + 8, /* qlshi */ + 44, /* qlshr_u */ + 8, /* qlshi_u */ + 44, /* qrshr */ + 8, /* qrshi */ + 44, /* qrshr_u */ + 8, /* qrshi_u */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -918,7 +955,44 @@ 4, /* lroti */ 4, /* rrotr */ 4, /* rroti */ - 8, /* ext */ - 4, /* ext_u */ - 4, /* dep */ + 8, /* extr */ + 4, /* extr_u */ + 4, /* depr */ + 8, /* depi */ + 4, /* negi */ + 4, /* comi */ + 4, /* exti_c */ + 4, /* exti_uc */ + 4, /* exti_s */ + 4, /* exti_us */ + 4, /* exti_i */ + 8, /* exti_ui */ + 4, /* bswapi_us */ + 8, /* bswapi_ui */ + 28, /* bswapi_ul */ + 4, /* htoni_us */ + 8, /* htoni_ui */ + 28, /* htoni_ul */ + 0, /* negi_f */ + 0, /* absi_f */ + 0, /* sqrti_f */ + 0, /* negi_d */ + 0, /* absi_d */ + 0, /* sqrti_d */ + 4, /* cloi */ + 4, /* clzi */ + 4, /* ctoi */ + 4, /* ctzi */ + 28, /* rbiti */ + 4, /* popcnti */ + 4, /* exti */ + 4, /* exti_u */ + 48, /* qlshr */ + 8, /* qlshi */ + 44, /* qlshr_u */ + 8, /* qlshi_u */ + 44, /* qrshr */ + 8, /* qrshi */ + 44, /* qrshr_u */ + 8, /* qrshi_u */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index d1bf1ab..59c33e5 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1556,10 +1556,18 @@ _emit_code(jit_state_t *_jit) case_rrw(rem, _u); case_rrr(lsh,); case_rrw(lsh,); + case_rrrr(qlsh,); + case_rrrw(qlsh,); + case_rrrr(qlsh, _u); + case_rrrw(qlsh, _u); case_rrr(rsh,); case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrrr(qrsh,); + case_rrrw(qrsh,); + case_rrrr(qrsh, _u); + case_rrrw(qrsh, _u); case_rrr(lrot,); case_rrw(lrot,); case_rrr(rrot,); |