lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
Diffstat
-rw-r--r--lib/jit_fallback.c 194
-rw-r--r--lib/jit_ia64-cpu.c 52
-rw-r--r--lib/jit_mips-cpu.c 49
-rw-r--r--lib/jit_ppc-cpu.c 73
-rw-r--r--lib/jit_ppc.c 2
-rw-r--r--lib/jit_s390-cpu.c 58
-rw-r--r--lib/jit_s390.c 8
-rw-r--r--lib/jit_sparc-cpu.c 58
8 files changed, 208 insertions, 286 deletions
diff --git a/lib/jit_fallback.c b/lib/jit_fallback.c
index cb593fa..21e2f42 100644
--- a/lib/jit_fallback.c
+++ b/lib/jit_fallback.c
@@ -1,5 +1,7 @@
#if PROTO
#define USE_BIT_TABLES 1
+#define USE_BITSWAP_UNROLLED 0
+#define USE_BITSWAP_LOOP 0
#define fallback_save(r0) _fallback_save(_jit, r0)
static void _fallback_save(jit_state_t*, jit_int32_t);
#define fallback_load(r0) _fallback_load(_jit, r0)
@@ -21,6 +23,8 @@ static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
#define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1)
static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_bitswap(r0,r1) _fallback_bitswap(_jit, r0, r1)
+static void _fallback_bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
# if defined(__ia64__)
# define fallback_flush() sync()
# elif defined(__mips__)
@@ -487,4 +491,194 @@ _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
jit_unget_reg(r1_reg);
# endif
}
+
+static void
+_fallback_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+# if USE_BIT_TABLES
+ /* t0 = r1;
+ * t1 = t0 & 0xff;
+ * t2 = swap_tab;
+ * r0 = t2[t1];
+ * t3 = 8;
+ * loop:
+ * t1 = t0 >> t3;
+ * t1 &= 0xff;
+ * r0 <<= 8;
+ * r0 |= t2[t1];
+ * t3 += 8;
+ * if (t3 < __WORDSIZE)
+ * goto loop;
+ */
+ jit_word_t loop;
+ jit_int32_t t0, r1_reg, t1, t2, t3;
+ static const unsigned char swap_tab[256] = {
+ 0, 128, 64, 192, 32, 160, 96, 224,
+ 16, 144, 80, 208, 48, 176, 112, 240,
+ 8, 136, 72, 200, 40, 168, 104, 232,
+ 24, 152, 88, 216 ,56, 184, 120, 248,
+ 4, 132, 68, 196, 36, 164, 100, 228,
+ 20, 148, 84, 212, 52, 180, 116, 244,
+ 12, 140, 76, 204, 44, 172, 108, 236,
+ 28, 156, 92, 220, 60, 188, 124, 252,
+ 2, 130, 66, 194, 34, 162, 98, 226,
+ 18, 146, 82, 210, 50, 178, 114, 242,
+ 10, 138, 74, 202, 42, 170, 106, 234,
+ 26, 154, 90, 218, 58, 186, 122, 250,
+ 6, 134, 70, 198, 38, 166, 102, 230,
+ 22, 150, 86, 214, 54, 182, 118, 246,
+ 14, 142, 78, 206, 46, 174, 110, 238,
+ 30, 158, 94, 222, 62, 190, 126, 254,
+ 1, 129, 65, 193, 33, 161, 97, 225,
+ 17, 145, 81, 209, 49, 177, 113, 241,
+ 9, 137, 73, 201, 41, 169, 105, 233,
+ 25, 153, 89, 217, 57, 185, 121, 249,
+ 5, 133, 69, 197, 37, 165, 101, 229,
+ 21, 149, 85, 213, 53, 181, 117, 245,
+ 13, 141, 77, 205, 45, 173, 109, 237,
+ 29, 157, 93, 221, 61, 189, 125, 253,
+ 3, 131, 67, 195, 35, 163, 99, 227,
+ 19, 147, 83, 211, 51, 179, 115, 243,
+ 11, 139, 75, 203, 43, 171, 107, 235,
+ 27, 155, 91, 219, 59, 187, 123, 251,
+ 7, 135, 71, 199, 39, 167, 103, 231,
+ 23, 151, 87, 215, 55, 183, 119, 247,
+ 15, 143, 79, 207, 47, 175, 111, 239,
+ 31, 159, 95, 223, 63, 191, 127, 255
+ };
+ if (r0 == r1) {
+ t0 = jit_get_reg(jit_class_gpr);
+ r1_reg = rn(t0);
+ }
+ else {
+ t0 = JIT_NOREG;
+ r1_reg = r1;
+ }
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ t3 = jit_get_reg(jit_class_gpr);
+ if (r0 == r1)
+ movr(rn(t0), r1);
+ extr_uc(rn(t1), r1_reg);
+ movi(rn(t2), (jit_word_t)swap_tab);
+ ldxr_uc(r0, rn(t2), rn(t1));
+ movi(rn(t3), 8);
+ fallback_flush();
+ loop = _jit->pc.w;
+ rshr(rn(t1), r1_reg, rn(t3));
+ extr_uc(rn(t1), rn(t1));
+ lshi(r0, r0, 8);
+ ldxr_uc(rn(t1), rn(t2), rn(t1));
+ orr(r0, r0, rn(t1));
+ addi(rn(t3), rn(t3), 8);
+ blti(loop, rn(t3), __WORDSIZE);
+ jit_unget_reg(t3);
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
+# elif USE_BITSWAP_UNROLLED
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int v; // 32-bit word to reverse bit order
+
+// swap odd and even bits
+v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+// swap consecutive pairs
+v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+// swap nibbles ...
+v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+// swap bytes
+v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+// swap 2-byte long pairs
+v = ( v >> 16 ) | ( v << 16);
+ */
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+# elif USE_BITSWAP_LOOP
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2
+unsigned int mask = ~0;
+while ((s >>= 1) > 0)
+{
+ mask ^= (mask << s);
+ v = ((v >> s) & mask) | ((v << s) & ~mask);
+}
+*/
+ jit_int32_t s, mask;
+ jit_word_t loop, done, t0, t1;
+ movr(v, r1);
+ s = jit_get_reg(jit_class_gpr);
+ movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
+ mask = jit_get_reg(jit_class_gpr);
+ movi(rn(mask), ~0L); /* mask = ~0; */
+ flush();
+ loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
+ rshi(rn(s), rn(s), 1); /* (s >>= 1) */
+ done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
+ t0 = jit_get_reg(jit_class_gpr);
+ lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
+ xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
+ rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
+ andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
+ t1 = jit_get_reg(jit_class_gpr);
+ lshr(rn(t1), v, rn(s)); /* t1 = v << s */
+ comr(v, rn(mask)); /* v = ~mask */
+ andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
+ orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */
+ jmpi(loop, 0);
+ flush();
+ patch_at(done, _jit->pc.w);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+ jit_unget_reg(mask);
+ jit_unget_reg(s);
+# endif
+}
#endif
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 98a10c3..a337673 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -1301,8 +1301,6 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define nei(r0,r1,i0) _nei(_jit,r0,r1,i0)
static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#define bitswap(r0, r1) _bitswap(_jit, r0, r1)
-static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
#define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
#define clzr(r0, r1) _clzr(_jit, r0, r1)
@@ -3476,52 +3474,6 @@ _nop(jit_state_t *_jit, jit_int32_t i0)
}
static void
-_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0, t1, t2, t3, t4;
- movr(r0, r1);
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
- rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
- rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
- rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
- rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), 0x0000ffff0000ffffL);
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
- lshi(rn(t2), r0, 32); /* t2 = v << 32 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-
-static void
_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.clz)
@@ -3545,7 +3497,7 @@ static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.clz) {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clor(r0, r0);
}
else
@@ -3556,7 +3508,7 @@ static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.clz) {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clzr(r0, r0);
}
else
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 0e1f0ed..5d4137c 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -493,8 +493,6 @@ static void _nop(jit_state_t*,jit_int32_t);
# define SELNEZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,55)
# define comr(r0,r1) xori(r0,r1,-1)
# define negr(r0,r1) subr(r0,_ZERO_REGNO,r1)
-# define bitswap(r0,r1) _bitswap(_jit, r0, r1);
-static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t);
# define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
# define clzr(r0, r1) _clzr(_jit, r0, r1)
@@ -1623,49 +1621,6 @@ _insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
DINS(r0, r1, pos, size);
}
-/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
-/*
-unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2
-unsigned int mask = ~0;
-while ((s >>= 1) > 0)
-{
- mask ^= (mask << s);
- v = ((v >> s) & mask) | ((v << s) & ~mask);
-}
-*/
-static void
-_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1)
-{
- jit_int32_t s, mask;
- jit_word_t loop, done, t0, t1;
- movr(v, r1);
- s = jit_get_reg(jit_class_gpr);
- movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
- mask = jit_get_reg(jit_class_gpr);
- movi(rn(mask), ~0L); /* mask = ~0; */
- flush();
- loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
- rshi(rn(s), rn(s), 1); /* (s >>= 1) */
- done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
- t0 = jit_get_reg(jit_class_gpr);
- lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
- xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
- rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
- andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
- t1 = jit_get_reg(jit_class_gpr);
- lshr(rn(t1), v, rn(s)); /* t1 = v << s */
- comr(v, rn(mask)); /* v = ~mask */
- andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
- orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */
- jmpi(loop, 0);
- flush();
- patch_at(done, _jit->pc.w);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
- jit_unget_reg(mask);
- jit_unget_reg(s);
-}
-
static void
_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
@@ -1722,7 +1677,7 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
#endif
}
else {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clor(r0, r0);
}
}
@@ -1746,7 +1701,7 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
#endif
}
else {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clzr(r0, r0);
}
}
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 67874c6..031f95d 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -533,8 +533,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define negr(r0,r1) NEG(r0,r1)
# define comr(r0,r1) NOT(r0,r1)
-# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
-static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
# define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
# if __WORDSIZE == 32
@@ -1220,73 +1218,6 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_unget_reg(r1_reg);
}
-/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
-/*
-unsigned int v; // 32-bit word to reverse bit order
-
-// swap odd and even bits
-v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
-// swap consecutive pairs
-v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
-// swap nibbles ...
-v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
-// swap bytes
-v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
-// swap 2-byte long pairs
-v = ( v >> 16 ) | ( v << 16);
- */
-static void
-_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0, t1, t2, t3, t4;
- movr(r0, r1);
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
- rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
- rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
- rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
- rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# if __WORDSIZE == 32
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- lshi(rn(t2), r0, 16); /* t2 = v << 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# else
- movi(rn(t0), 0x0000ffff0000ffffL);
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
- lshi(rn(t2), r0, 32); /* t2 = v << 32 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# endif
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-
static void
_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
@@ -1297,14 +1228,14 @@ _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clor(r0, r0);
}
static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clzr(r0, r0);
}
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index 869e876..0ad4ae8 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -97,6 +97,7 @@ extern void __clear_cache(void *, void *);
#define PROTO 1
# include "jit_ppc-cpu.c"
# include "jit_ppc-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
@@ -1926,6 +1927,7 @@ _emit_code(jit_state_t *_jit)
#define CODE 1
# include "jit_ppc-cpu.c"
# include "jit_ppc-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 2e9e074..0718938 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -1081,8 +1081,6 @@ static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# else
# define negr(r0,r1) LCGR(r0,r1)
# endif
-# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
-static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
# define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
# define clzr(r0, r1) _clzr(_jit, r0, r1)
@@ -2994,58 +2992,6 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
#endif
static void
-_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0, t1, t2, t3, t4;
- movr(r0, r1);
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
- rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
- rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
- rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
- rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# if __WORDSIZE == 32
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- lshi(rn(t2), r0, 16); /* t2 = v << 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# else
- movi(rn(t0), 0x0000ffff0000ffffL);
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
- lshi(rn(t2), r0, 32); /* t2 = v << 32 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# endif
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-
-static void
_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
#if CHECK_FLOGR
@@ -3097,7 +3043,7 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
#if CHECK_FLOGR
if (jit_cpu.flogr) {
#endif
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clor(r0, r0);
#if CHECK_FLOGR
}
@@ -3112,7 +3058,7 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
#if CHECK_FLOGR
if (jit_cpu.flogr) {
#endif
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clzr(r0, r0);
#if CHECK_FLOGR
}
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 6934b11..25c6421 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -94,9 +94,7 @@ extern void __clear_cache(void *, void *);
#define PROTO 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
-# if CHECK_FLOGR
-# include "jit_fallback.c"
-# endif
+# include "jit_fallback.c"
#undef PROTO
/*
@@ -1675,9 +1673,7 @@ _emit_code(jit_state_t *_jit)
#define CODE 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
-# if CHECK_FLOGR
-# include "jit_fallback.c"
-# endif
+# include "jit_fallback.c"
#undef CODE
void
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index f4ce621..7e82e0f 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -573,8 +573,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define comr(r0, r1) XNOR(r1, 0, r0)
# define negr(r0, r1) NEG(r1, r0)
-# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
-static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
# define clor(r0, r1) _clor(_jit, r0, r1)
static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
# define clzr(r0, r1) _clzr(_jit, r0, r1)
@@ -1333,58 +1331,6 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
}
static void
-_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0, t1, t2, t3, t4;
- movr(r0, r1);
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
- rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
- rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
- rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
- rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# if __WORDSIZE == 32
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- lshi(rn(t2), r0, 16); /* t2 = v << 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# else
- movi(rn(t0), 0x0000ffff0000ffffL);
- rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
- andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
- andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
- lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
- rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
- lshi(rn(t2), r0, 32); /* t2 = v << 32 */
- orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
-# endif
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-
-static void
_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.lzcnt) {
@@ -1419,7 +1365,7 @@ static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.lzcnt) {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clor(r0, r0);
}
else
@@ -1430,7 +1376,7 @@ static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.lzcnt) {
- bitswap(r0, r1);
+ fallback_bitswap(r0, r1);
clzr(r0, r0);
}
else
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月18日 03:59:45 +0000

AltStyle によって変換されたページ (->オリジナル) /