Avoid expensive bit reverse for count of trailing zeros or ones - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月09日 09:32:03 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年03月09日 09:32:03 -0300
commita78fda56dcf723fcdd6cb032c394a0330d4fe36f (patch)
tree8087e99c7a1aac69cc08e8cd22bb070a93e9e9bf
parent9ad57b7a90803b95a2c822e259cfb7113a38a72c (diff)
downloadlightning-a78fda56dcf723fcdd6cb032c394a0330d4fe36f.tar.gz
Avoid expensive bit reverse for count of trailing zeros or ones
Diffstat
-rw-r--r--lib/jit_ia64-cpu.c 16
-rw-r--r--lib/jit_ppc-cpu.c 16
-rw-r--r--lib/jit_s390-cpu.c 16
-rw-r--r--lib/jit_sparc-cpu.c 16
4 files changed, 48 insertions, 16 deletions
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 9e3cee1..1c89029 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -3498,8 +3498,8 @@ static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.clz) {
- fallback_bitswap(r0, r1);
- clor(r0, r0);
+ comr(r0, r1);
+ ctzr(r0, r0);
}
else
fallback_cto(r0, r1);
@@ -3508,9 +3508,17 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ jit_int32_t t0, t1;
if (jit_cpu.clz) {
- fallback_bitswap(r0, r1);
- clzr(r0, r0);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
}
else
fallback_ctz(r0, r1);
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 4de46b3..26b14fb 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -1242,15 +1242,23 @@ _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- fallback_bitswap(r0, r1);
- clor(r0, r0);
+ comr(r0, r1);
+ ctzr(r0, r0);
}
static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- fallback_bitswap(r0, r1);
- clzr(r0, r0);
+ jit_int32_t t0, t1;
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
}
static void
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index d8b29f5..8ff6e48 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -3066,8 +3066,8 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
#if CHECK_FLOGR
if (jit_cpu.flogr) {
#endif
- fallback_bitswap(r0, r1);
- clor(r0, r0);
+ comr(r0, r1);
+ ctzr(r0, r0);
#if CHECK_FLOGR
}
else
@@ -3078,11 +3078,19 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ jit_int32_t t0, t1;
#if CHECK_FLOGR
if (jit_cpu.flogr) {
#endif
- fallback_bitswap(r0, r1);
- clzr(r0, r0);
+ t0 = jit_get_reg_but_zero(0);
+ t1 = jit_get_reg_but_zero(0);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
#if CHECK_FLOGR
}
else
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 420ef6c..770f145 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -1366,8 +1366,8 @@ static void
_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (jit_cpu.lzcnt) {
- fallback_bitswap(r0, r1);
- clor(r0, r0);
+ comr(r0, r1);
+ ctzr(r0, r0);
}
else
fallback_cto(r0, r1);
@@ -1376,9 +1376,17 @@ _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ jit_int32_t t0, t1;
if (jit_cpu.lzcnt) {
- fallback_bitswap(r0, r1);
- clzr(r0, r0);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
}
else
fallback_ctz(r0, r1);
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月13日 08:04:42 +0000

AltStyle によって変換されたページ (->オリジナル) /