lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2024年01月31日 18:24:36 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2024年01月31日 18:24:36 -0300
commita03782b2df5835e1735472c8a142baa6cb32ef02 (patch)
tree07f61bfcf1fe1dd8355406ab92a1423f81f26b9f
parent6045d67ff68fe8238a9bb7017eeef6abeeeaaa2c (diff)
downloadlightning-a03782b2df5835e1735472c8a142baa6cb32ef02.tar.gz
x86: Implement optimized post increment load/store
It is only available for a few register combinations and assumes DF is using default/standard value of 0. A new check/cldstxba.c test has been added to validate all valid combinations for x86*, as these cannot be achieved easily with the check/lightning test driver.
Diffstat
-rw-r--r--check/Makefile.am 5
-rw-r--r--check/cldstxba.c 153
-rw-r--r--lib/jit_x86-cpu.c 218
-rw-r--r--lib/jit_x86.c 14
4 files changed, 379 insertions, 11 deletions
diff --git a/check/Makefile.am b/check/Makefile.am
index 4d41d87..a6fa7a9 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -18,7 +18,7 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
-D_GNU_SOURCE $(LIGHTNING_CFLAGS)
check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
- catomic protect riprel cbit callee
+ catomic protect riprel cbit callee cldstxba
lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
lightning_SOURCES = lightning.c
@@ -67,6 +67,9 @@ cbit_SOURCES = cbit.c
callee_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
callee_SOURCES = callee.c
+cldstxba_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+cldstxba_SOURCES = cldstxba.c
+
noinst_PROGRAMS = gen_cbit
gen_cbit_SOURCES = gen_cbit.c
diff --git a/check/cldstxba.c b/check/cldstxba.c
new file mode 100644
index 0000000..e7f4c0d
--- /dev/null
+++ b/check/cldstxba.c
@@ -0,0 +1,153 @@
+#include <lightning.h>
+#include <stdio.h>
+
+#if !defined(offsetof)
+# define offsetof(type, field) ((char *)&((type *)0)->field - (char *)0)
+#endif
+
+int
+main(int argc, char *argv[])
+{
+#if defined(__x86_64__) || defined(__i386__)
+ jit_state_t *_jit;
+ jit_node_t *jmp, *fail;
+ void (*code)(void);
+ struct data_t {
+ signed char sc;
+ unsigned char uc;
+ signed short ss;
+ unsigned short us;
+ signed int si;
+ unsigned int ui;
+ unsigned long ul;
+ } data;
+
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+ jit_prolog();
+ fail = jit_forward();
+
+#define SC_VAL -3
+ jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, sc));
+ jit_movi(_RAX, SC_VAL);
+ jit_movr(_RSI, _RDI);
+ jit_stxai_c(1, _RDI, _RAX);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, 1);
+ jit_patch_at(jmp, fail);
+ data.uc = 0xa3;
+
+#define SS_VAL -31
+ jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ss));
+ jit_movi(_RAX, SS_VAL);
+ jit_movr(_RSI, _RDI);
+ jit_stxai_s(2, _RDI, _RAX);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, 2);
+ jit_patch_at(jmp, fail);
+ data.us = 0x5aa5;
+
+#define SI_VAL -511
+ jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, si));
+ jit_movi(_RAX, SI_VAL);
+ jit_movr(_RSI, _RDI);
+ jit_stxai_i(4, _RDI, _RAX);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, 4);
+ jit_patch_at(jmp, fail);
+ data.ui = 0xabcddcba;
+
+# if __X64 && !__X64_32
+#define UL_VAL 0x123456789abcdef
+ jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ul));
+ jit_movi(_RAX, UL_VAL);
+ jit_movr(_RSI, _RDI);
+ jit_stxai_l(8, _RDI, _RAX);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, 8);
+ jit_patch_at(jmp, fail);
+# endif
+
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, sc));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_c(_RAX, _RSI, 1);
+ jmp = jit_bnei(_RAX, SC_VAL);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -1);
+ jit_patch_at(jmp, fail);
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, uc));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_uc(_RAX, _RSI, 1);
+ jmp = jit_bnei(_RAX, data.uc);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -1);
+ jit_patch_at(jmp, fail);
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ss));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_s(_RAX, _RSI, 2);
+ jmp = jit_bnei(_RAX, SS_VAL);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -2);
+ jit_patch_at(jmp, fail);
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, us));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_us(_RAX, _RSI, 2);
+ jmp = jit_bnei(_RAX, data.us);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -2);
+ jit_patch_at(jmp, fail);
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, si));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_i(_RAX, _RSI, 4);
+ jmp = jit_bnei(_RAX, SI_VAL);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -4);
+ jit_patch_at(jmp, fail);
+# if __X64 && !__X64_32
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ui));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_ui(_RAX, _RSI, 4);
+ jmp = jit_bnei(_RAX, data.ui);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -4);
+ jit_patch_at(jmp, fail);
+ jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ul));
+ jit_movr(_RDI, _RSI);
+ jit_ldxai_l(_RAX, _RSI, 8);
+ jmp = jit_bnei(_RAX, UL_VAL);
+ jit_patch_at(jmp, fail);
+ jit_subr(_RDI, _RDI, _RSI);
+ jmp = jit_bnei(_RDI, -8);
+ jit_patch_at(jmp, fail);
+# endif
+
+ jmp = jit_jmpi();
+ jit_link(fail);
+ jit_calli(abort);
+ jit_patch(jmp);
+ jit_prepare();
+ {
+ jit_pushargi((jit_word_t)"ok");
+ }
+ jit_finishi(puts);
+ jit_ret();
+ jit_epilog();
+ code = jit_emit();
+ jit_clear_state();
+
+ (*code)();
+
+ jit_destroy_state();
+ finish_jit();
+#else
+ puts("ok");
+#endif
+ return (0);
+}
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 126fca6..6957adf 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -570,6 +570,45 @@ static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# endif
# endif
+# define ldxbr_c(r0, r1, r2) generic_ldxbr_c(r0, r1, r2)
+# define ldxbi_c(r0, r1, i0) generic_ldxbi_c(r0, r1, i0)
+# define ldxbr_uc(r0, r1, r2) generic_ldxbr_uc(r0, r1, r2)
+# define ldxbi_uc(r0, r1, i0) generic_ldxbi_uc(r0, r1, i0)
+# define ldxbr_s(r0, r1, r2) generic_ldxbr_s(r0, r1, r2)
+# define ldxbi_s(r0, r1, i0) generic_ldxbi_s(r0, r1, i0)
+# define ldxbr_us(r0, r1, r2) generic_ldxbr_us(r0, r1, r2)
+# define ldxbi_us(r0, r1, i0) generic_ldxbi_us(r0, r1, i0)
+# define ldxbr_i(r0, r1, r2) generic_ldxbr_i(r0, r1, r2)
+# define ldxbi_i(r0, r1, i0) generic_ldxbi_i(r0, r1, i0)
+# if __X64 && !__X64_32
+# define ldxbr_ui(r0, r1, i0) generic_ldxbr_ui(r0, r1, i0)
+# define ldxbi_ui(r0, r1, i0) generic_ldxbi_ui(r0, r1, i0)
+# define ldxbr_l(r0, r1, r2) generic_ldxbr_l(r0, r1, r2)
+# define ldxbi_l(r0, r1, i0) generic_ldxbi_l(r0, r1, i0)
+# endif
+# define ldxar_c(r0, r1, r2) generic_ldxar_c(r0, r1, r2)
+# define ldxai_c(r0, r1, i0) _ldxai_c(_jit,r0, r1, i0)
+static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define ldxar_uc(r0, r1, r2) generic_ldxar_uc(r0, r1, r2)
+# define ldxai_uc(r0, r1, i0) _ldxai_uc(_jit, r0, r1, i0)
+static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define ldxar_s(r0, r1, r2) generic_ldxar_s(r0, r1, r2)
+# define ldxai_s(r0, r1, i0) _ldxai_s(_jit, r0, r1, i0)
+static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define ldxar_us(r0, r1, r2) generic_ldxar_us(r0, r1, r2)
+# define ldxai_us(r0, r1, i0) _ldxai_us(_jit, r0, r1, i0)
+static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define ldxar_i(r0, r1, r2) generic_ldxar_i(r0, r1, r2)
+# define ldxai_i(r0, r1, i0) _ldxai_i(_jit, r0, r1, i0)
+static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# if __X64 && !__X64_32
+# define ldxar_ui(r0, r1, i0) generic_ldxar_ui(r0, r1, i0)
+# define ldxai_ui(r0, r1, i0) _ldxai_ui(_jit, r0, r1, i0)
+static void _ldxai_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define ldxar_l(r0, r1, r2) generic_ldxar_l(r0, r1, r2)
+# define ldxai_l(r0, r1, i0) _ldxai_l(_jit, r0, r1, i0)
+static void _ldxai_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# endif
# define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
# define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
# define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
@@ -610,8 +649,34 @@ static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
# endif
-#define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
-#define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
+# define stxbr_c(r0, r1, r2) generic_stxbr_c(r0, r1, r2)
+# define stxbi_c(i0, r0, r1) generic_stxbi_c(i0, r0, r1)
+# define stxbr_s(r0, r1, r2) generic_stxbr_s(r0, r1, r2)
+# define stxbi_s(i0, r0, r1) generic_stxbi_s(i0, r0, r1)
+# define stxbr_i(r0, r1, r2) generic_stxbr_i(r0, r1, r2)
+# define stxbi_i(i0, r0, r1) generic_stxbi_i(i0, r0, r1)
+# if __X64 && !__X64_32
+# define stxbr_l(r0, r1, r2) generic_stxbr_l(r0, r1, r2)
+# define stxbi_l(i0, r0, r1) generic_stxbi_l(i0, r0, r1)
+# endif
+
+# define stxar_c(r0, r1, r2) generic_stxar_c(r0, r1, r2)
+# define stxai_c(i0, r0, r1) _stxai_c(_jit, i0, r0, r1)
+static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+# define stxar_s(r0, r1, r2) generic_stxar_s(r0, r1, r2)
+# define stxai_s(i0, r0, r1) _stxai_s(_jit, i0, r0, r1)
+static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+# define stxar_i(r0, r1, r2) generic_stxar_i(r0, r1, r2)
+# define stxai_i(i0, r0, r1) _stxai_i(_jit, i0, r0, r1)
+static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+# if __X64 && !__X64_32
+# define stxar_l(r0, r1, r2) generic_stxar_l(r0, r1, r2)
+# define stxai_l(i0, r0, r1) _stxai_l(_jit, i0, r0, r1)
+static void _stxai_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+# endif
+
+# define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
+# define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
# define jcc(code, i0) _jcc(_jit, code, i0)
# define jo(i0) jcc(X86_CC_O, i0)
# define jno(i0) jcc(X86_CC_NO, i0)
@@ -3730,6 +3795,104 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
#endif
static void
+_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) {
+ /* lods %rsi, %al */
+ ic(0xac);
+ extr_c(r0, r0);
+ }
+ else
+ generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) {
+ /* lods %rsi, %al */
+ ic(0xac);
+ extr_uc(r0, r0);
+ }
+ else
+ generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) {
+ /* lods %rsi, %ax */
+ ic(0x66);
+ ic(0xad);
+ extr_s(r0, r0);
+ }
+ else
+ generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) {
+ /* lods %rsi, %ax */
+ ic(0x66);
+ ic(0xad);
+ extr_us(r0, r0);
+ }
+ else
+ generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) {
+ /* lods %rsi, %eax */
+ ic(0xad);
+# if __X64 && !__X64_32
+ extr_i(r0, r0);
+# endif
+ }
+ else
+ generic_ldxai_i(r0, r1, i0);
+}
+
+# if __X64 && !__X64_32
+static void
+_ldxai_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) {
+ /* lods %rsi, %eax */
+ ic(0xad);
+ extr_ui(r0, r0);
+ }
+ else
+ generic_ldxai_ui(r0, r1, i0);
+}
+
+
+static void
+_ldxai_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 8) {
+ /* lods %rsi, %rax */
+ ic(0x48); /* rex.w */
+ ic(0xad);
+ }
+ else
+ generic_ldxai_l(r0, r1, i0);
+}
+# endif
+
+static void
_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
@@ -4053,6 +4216,57 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
}
#endif
+static void
+_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 1)
+ /* stos %al, %rdi */
+ ic(0xaa);
+ else
+ generic_stxai_c(i0, r0, r1);
+}
+
+static void
+_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 2) {
+ /* stos %ax, %rdi */
+ ic(0x66);
+ ic(0xab);
+ }
+ else
+ generic_stxai_s(i0, r0, r1);
+}
+
+static void
+_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 4)
+ /* stos %eax, %rdi */
+ ic(0xab);
+ else
+ generic_stxai_i(i0, r0, r1);
+}
+
+#if __X64 && !__X64_32
+static void
+_stxai_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+ /* Assume DF = 0 */
+ if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 8) {
+ /* rex.w */
+ ic(0x48);
+ /* stos %rax, %rdi */
+ ic(0xab);
+ }
+ else
+ generic_stxai_l(i0, r0, r1);
+}
+#endif
+
static jit_word_t
_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
{
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 69cb86d..58bf9ae 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1602,23 +1602,21 @@ _emit_code(jit_state_t *_jit)
break
#define case_rrx(name, type) \
case jit_code_##name##i##type: \
- generic_##name##i##type(rn(node->u.w), \
- rn(node->v.w), node->w.w); \
+ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
break
#define case_rrX(name, type) \
case jit_code_##name##r##type: \
- generic_##name##r##type(rn(node->u.w), \
- rn(node->v.w), rn(node->w.w)); \
+ name##r##type(rn(node->u.w), \
+ rn(node->v.w), rn(node->w.w)); \
break
#define case_xrr(name, type) \
case jit_code_##name##i##type: \
- generic_##name##i##type(node->u.w, rn(node->v.w), \
- rn(node->w.w)); \
+ name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
break
#define case_Xrr(name, type) \
case jit_code_##name##r##type: \
- generic_##name##r##type(rn(node->u.w), rn(node->v.w), \
- rn(node->w.w)); \
+ name##r##type(rn(node->u.w), rn(node->v.w), \
+ rn(node->w.w)); \
break
#define case_rrrw(name, type) \
case jit_code_##name##i##type: \
generated by cgit v1.2.3 (git 2.25.1) at 2025年09月16日 06:25:24 +0000

AltStyle によって変換されたページ (->オリジナル) /