-rw-r--r-- | lib/jit_fallback.c | 194 | ||||
-rw-r--r-- | lib/jit_ia64-cpu.c | 52 | ||||
-rw-r--r-- | lib/jit_mips-cpu.c | 49 | ||||
-rw-r--r-- | lib/jit_ppc-cpu.c | 73 | ||||
-rw-r--r-- | lib/jit_ppc.c | 2 | ||||
-rw-r--r-- | lib/jit_s390-cpu.c | 58 | ||||
-rw-r--r-- | lib/jit_s390.c | 8 | ||||
-rw-r--r-- | lib/jit_sparc-cpu.c | 58 |
diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c index cb593fa..21e2f42 100644 --- a/lib/jit_fallback.c +++ b/lib/jit_fallback.c @@ -1,5 +1,7 @@ #if PROTO #define USE_BIT_TABLES 1 +#define USE_BITSWAP_UNROLLED 0 +#define USE_BITSWAP_LOOP 0 #define fallback_save(r0) _fallback_save(_jit, r0) static void _fallback_save(jit_state_t*, jit_int32_t); #define fallback_load(r0) _fallback_load(_jit, r0) @@ -21,6 +23,8 @@ static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t); static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t); #define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1) static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_bitswap(r0,r1) _fallback_bitswap(_jit, r0, r1) +static void _fallback_bitswap(jit_state_t*, jit_int32_t, jit_int32_t); # if defined(__ia64__) # define fallback_flush() sync() # elif defined(__mips__) @@ -487,4 +491,194 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(r1_reg); # endif } + +static void +_fallback_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +# if USE_BIT_TABLES + /* t0 = r1; + * t1 = t0 & 0xff; + * t2 = swap_tab; + * r0 = t2[t1]; + * t3 = 8; + * loop: + * t1 = t0 >> t3; + * t1 &= 0xff; + * r0 <<= 8; + * r0 |= t2[t1]; + * t3 += 8; + * if (t3 < __WORDSIZE) + * goto loop; + */ + jit_word_t loop; + jit_int32_t t0, r1_reg, t1, t2, t3; + static const unsigned char swap_tab[256] = { + 0, 128, 64, 192, 32, 160, 96, 224, + 16, 144, 80, 208, 48, 176, 112, 240, + 8, 136, 72, 200, 40, 168, 104, 232, + 24, 152, 88, 216 ,56, 184, 120, 248, + 4, 132, 68, 196, 36, 164, 100, 228, + 20, 148, 84, 212, 52, 180, 116, 244, + 12, 140, 76, 204, 44, 172, 108, 236, + 28, 156, 92, 220, 60, 188, 124, 252, + 2, 130, 66, 194, 34, 162, 98, 226, + 18, 146, 82, 210, 50, 178, 114, 242, + 10, 138, 74, 202, 42, 170, 106, 234, + 26, 154, 90, 218, 58, 186, 122, 250, + 6, 134, 70, 198, 38, 166, 102, 230, + 22, 150, 86, 214, 54, 182, 118, 246, + 14, 142, 78, 206, 46, 174, 110, 238, + 30, 158, 94, 222, 62, 190, 126, 254, + 1, 129, 65, 193, 33, 161, 97, 225, + 17, 145, 81, 209, 49, 177, 113, 241, + 9, 137, 73, 201, 41, 169, 105, 233, + 25, 153, 89, 217, 57, 185, 121, 249, + 5, 133, 69, 197, 37, 165, 101, 229, + 21, 149, 85, 213, 53, 181, 117, 245, + 13, 141, 77, 205, 45, 173, 109, 237, + 29, 157, 93, 221, 61, 189, 125, 253, + 3, 131, 67, 195, 35, 163, 99, 227, + 19, 147, 83, 211, 51, 179, 115, 243, + 11, 139, 75, 203, 43, 171, 107, 235, + 27, 155, 91, 219, 59, 187, 123, 251, + 7, 135, 71, 199, 39, 167, 103, 231, + 23, 151, 87, 215, 55, 183, 119, 247, + 15, 143, 79, 207, 47, 175, 111, 239, + 31, 159, 95, 223, 63, 191, 127, 255 + }; + if (r0 == r1) { + t0 = jit_get_reg(jit_class_gpr); + r1_reg = rn(t0); + } + else { + t0 = JIT_NOREG; + r1_reg = r1; + } + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + t3 = jit_get_reg(jit_class_gpr); + if (r0 == r1) + movr(rn(t0), r1); + extr_uc(rn(t1), r1_reg); + movi(rn(t2), (jit_word_t)swap_tab); + ldxr_uc(r0, rn(t2), rn(t1)); + movi(rn(t3), 8); + fallback_flush(); + loop = _jit->pc.w; + rshr(rn(t1), r1_reg, rn(t3)); + extr_uc(rn(t1), rn(t1)); + lshi(r0, r0, 8); + ldxr_uc(rn(t1), rn(t2), rn(t1)); + orr(r0, r0, rn(t1)); + addi(rn(t3), rn(t3), 8); + blti(loop, rn(t3), __WORDSIZE); + jit_unget_reg(t3); + jit_unget_reg(t2); + jit_unget_reg(t1); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); +# elif USE_BITSWAP_UNROLLED +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int v; // 32-bit word to reverse bit order + +// swap odd and even bits +v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); +// swap consecutive pairs +v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); +// swap nibbles ... +v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); +// swap bytes +v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); +// swap 2-byte long pairs +v = ( v >> 16 ) | ( v << 16); + */ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +# elif USE_BITSWAP_LOOP +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 +unsigned int mask = ~0; +while ((s >>= 1) > 0) +{ + mask ^= (mask << s); + v = ((v >> s) & mask) | ((v << s) & ~mask); +} +*/ + jit_int32_t s, mask; + jit_word_t loop, done, t0, t1; + movr(v, r1); + s = jit_get_reg(jit_class_gpr); + movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */ + mask = jit_get_reg(jit_class_gpr); + movi(rn(mask), ~0L); /* mask = ~0; */ + flush(); + loop = _jit->pc.w; /* while ((s >>= 1) > 0) */ + rshi(rn(s), rn(s), 1); /* (s >>= 1) */ + done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */ + t0 = jit_get_reg(jit_class_gpr); + lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */ + xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */ + rshr(rn(t0), v, rn(s)); /* t0 = v >> s */ + andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */ + t1 = jit_get_reg(jit_class_gpr); + lshr(rn(t1), v, rn(s)); /* t1 = v << s */ + comr(v, rn(mask)); /* v = ~mask */ + andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */ + orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */ + jmpi(loop, 0); + flush(); + patch_at(done, _jit->pc.w); + jit_unget_reg(t1); + jit_unget_reg(t0); + jit_unget_reg(mask); + jit_unget_reg(s); +# endif +} #endif diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 98a10c3..a337673 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1301,8 +1301,6 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -#define bitswap(r0, r1) _bitswap(_jit, r0, r1) -static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); #define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); #define clzr(r0, r1) _clzr(_jit, r0, r1) @@ -3476,52 +3474,6 @@ _nop(jit_state_t *_jit, jit_int32_t i0) } static void -_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0, t1, t2, t3, t4; - movr(r0, r1); - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); - rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); - rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); - rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); - rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), 0x0000ffff0000ffffL); - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ - lshi(rn(t2), r0, 32); /* t2 = v << 32 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.clz) @@ -3545,7 +3497,7 @@ static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.clz) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clor(r0, r0); } else @@ -3556,7 +3508,7 @@ static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.clz) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clzr(r0, r0); } else diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 0e1f0ed..5d4137c 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -493,8 +493,6 @@ static void _nop(jit_state_t*,jit_int32_t); # define SELNEZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,55) # define comr(r0,r1) xori(r0,r1,-1) # define negr(r0,r1) subr(r0,_ZERO_REGNO,r1) -# define bitswap(r0,r1) _bitswap(_jit, r0, r1); -static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t); # define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); # define clzr(r0, r1) _clzr(_jit, r0, r1) @@ -1623,49 +1621,6 @@ _insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, DINS(r0, r1, pos, size); } -/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ -/* -unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 -unsigned int mask = ~0; -while ((s >>= 1) > 0) -{ - mask ^= (mask << s); - v = ((v >> s) & mask) | ((v << s) & ~mask); -} -*/ -static void -_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1) -{ - jit_int32_t s, mask; - jit_word_t loop, done, t0, t1; - movr(v, r1); - s = jit_get_reg(jit_class_gpr); - movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */ - mask = jit_get_reg(jit_class_gpr); - movi(rn(mask), ~0L); /* mask = ~0; */ - flush(); - loop = _jit->pc.w; /* while ((s >>= 1) > 0) */ - rshi(rn(s), rn(s), 1); /* (s >>= 1) */ - done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */ - t0 = jit_get_reg(jit_class_gpr); - lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */ - xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */ - rshr(rn(t0), v, rn(s)); /* t0 = v >> s */ - andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */ - t1 = jit_get_reg(jit_class_gpr); - lshr(rn(t1), v, rn(s)); /* t1 = v << s */ - comr(v, rn(mask)); /* v = ~mask */ - andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */ - orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */ - jmpi(loop, 0); - flush(); - patch_at(done, _jit->pc.w); - jit_unget_reg(t1); - jit_unget_reg(t0); - jit_unget_reg(mask); - jit_unget_reg(s); -} - static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -1722,7 +1677,7 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #endif } else { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clor(r0, r0); } } @@ -1746,7 +1701,7 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #endif } else { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clzr(r0, r0); } } diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 67874c6..031f95d 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -533,8 +533,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) -# define bitswap(r0, r1) _bitswap(_jit, r0, r1) -static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); # define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); # if __WORDSIZE == 32 @@ -1220,73 +1218,6 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_unget_reg(r1_reg); } -/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ -/* -unsigned int v; // 32-bit word to reverse bit order - -// swap odd and even bits -v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); -// swap consecutive pairs -v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); -// swap nibbles ... -v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); -// swap bytes -v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); -// swap 2-byte long pairs -v = ( v >> 16 ) | ( v << 16); - */ -static void -_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0, t1, t2, t3, t4; - movr(r0, r1); - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); - rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); - rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); - rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); - rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# if __WORDSIZE == 32 - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - lshi(rn(t2), r0, 16); /* t2 = v << 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# else - movi(rn(t0), 0x0000ffff0000ffffL); - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ - lshi(rn(t2), r0, 32); /* t2 = v << 32 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# endif - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -1297,14 +1228,14 @@ _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clor(r0, r0); } static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clzr(r0, r0); } diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 869e876..0ad4ae8 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -97,6 +97,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_ppc-cpu.c" # include "jit_ppc-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1926,6 +1927,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_ppc-cpu.c" # include "jit_ppc-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 2e9e074..0718938 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -1081,8 +1081,6 @@ static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # else # define negr(r0,r1) LCGR(r0,r1) # endif -# define bitswap(r0, r1) _bitswap(_jit, r0, r1) -static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); # define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); # define clzr(r0, r1) _clzr(_jit, r0, r1) @@ -2994,58 +2992,6 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) #endif static void -_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0, t1, t2, t3, t4; - movr(r0, r1); - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); - rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); - rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); - rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); - rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# if __WORDSIZE == 32 - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - lshi(rn(t2), r0, 16); /* t2 = v << 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# else - movi(rn(t0), 0x0000ffff0000ffffL); - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ - lshi(rn(t2), r0, 32); /* t2 = v << 32 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# endif - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { #if CHECK_FLOGR @@ -3097,7 +3043,7 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #if CHECK_FLOGR if (jit_cpu.flogr) { #endif - bitswap(r0, r1); + fallback_bitswap(r0, r1); clor(r0, r0); #if CHECK_FLOGR } @@ -3112,7 +3058,7 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #if CHECK_FLOGR if (jit_cpu.flogr) { #endif - bitswap(r0, r1); + fallback_bitswap(r0, r1); clzr(r0, r0); #if CHECK_FLOGR } diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 6934b11..25c6421 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -94,9 +94,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" -# if CHECK_FLOGR -# include "jit_fallback.c" -# endif +# include "jit_fallback.c" #undef PROTO /* @@ -1675,9 +1673,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" -# if CHECK_FLOGR -# include "jit_fallback.c" -# endif +# include "jit_fallback.c" #undef CODE void diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index f4ce621..7e82e0f 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -573,8 +573,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) -# define bitswap(r0, r1) _bitswap(_jit, r0, r1) -static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); # define clor(r0, r1) _clor(_jit, r0, r1) static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); # define clzr(r0, r1) _clzr(_jit, r0, r1) @@ -1333,58 +1331,6 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } static void -_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0, t1, t2, t3, t4; - movr(r0, r1); - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); - rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); - rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); - rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); - rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# if __WORDSIZE == 32 - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - lshi(rn(t2), r0, 16); /* t2 = v << 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# else - movi(rn(t0), 0x0000ffff0000ffffL); - rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ - andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ - andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ - lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ - rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ - lshi(rn(t2), r0, 32); /* t2 = v << 32 */ - orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ -# endif - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.lzcnt) { @@ -1419,7 +1365,7 @@ static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.lzcnt) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clor(r0, r0); } else @@ -1430,7 +1376,7 @@ static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_cpu.lzcnt) { - bitswap(r0, r1); + fallback_bitswap(r0, r1); clzr(r0, r0); } else |