Add new fused multiply add/sub instructions - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:35:56 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:35:56 -0300
commitc1e52afb6307e6eb74ded0c910232e895d07ddcd (patch)
treed9292666cc74b0a0b1772f661e6989590c17d1c6
parent7b21297cb9c2ce5b8ed8f3364d77d4eafe38773d (diff)
downloadlightning-c1e52afb6307e6eb74ded0c910232e895d07ddcd.tar.gz
Add new fused multiply add/sub instructions
Probably, and for completeness, it should also be required to add fnma* and fnms* instructions, for 'negated' versions. What would better to map to what is provided by most supported cpus, and is a way to better use these features. * include/lightning.h.in: Define new fmar_f, fmai_f, fmsr_f, fmsi_f, fmar_d, fmai_d, fmsr_d and fmsi_d instructions, that add support for fused multiply add/sub, in the format r0 = r1 * r2 +/- r3. * include/lightning/jit_private.h: Add helper macros for debug output. * lib/jit_names.c: Add strings for debug output. * lib/jit_print.c: Print debug output for the new instructions. * lib/lightning.c: Add logic for the new register pair in the 'v' (second) field of jit_node_t. The new pattern is required to allow having a 'double' immediate in the last argument, for the versions with immediates. The versions with immediates are added for consistency, as they should be very rarely used in common usage of fused multiply add/sub.
Diffstat
-rw-r--r--ChangeLog 17
-rw-r--r--include/lightning.h.in 45
-rw-r--r--include/lightning/jit_private.h 19
-rw-r--r--lib/jit_names.c 4
-rw-r--r--lib/jit_print.c 35
-rw-r--r--lib/lightning.c 334
6 files changed, 393 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index b13b56e..5013515 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2023年04月18日 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h.in: Define new fmar_f, fmai_f, fmsr_f,
+ fmsi_f, fmar_d, fmai_d, fmsr_d and fmsi_d instructions, that
+ add support for fused multiply add/sub, in the format
+ r0 = r1 * r2 +/- r3.
+ * include/lightning/jit_private.h: Add helper macros for debug
+ output.
+ * lib/jit_names.c: Add strings for debug output.
+ * lib/jit_print.c: Print debug output for the new instructions.
+ * lib/lightning.c: Add logic for the new register pair in the
+ 'v' (second) field of jit_node_t. The new pattern is required
+ to allow having a 'double' immediate in the last argument, for
+ the versions with immediates. The versions with immediates are
+ added for consistency, as they should be very rarely used in
+ common usage of fused multiply add/sub.
+
2023年04月06日 Paulo Andrade <pcpa@gnu.org>
* include/lightning.h.in: Define new movi_w_f, movi_w_d and
diff --git a/include/lightning.h.in b/include/lightning.h.in
index 53c7f1f..41b96e2 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -1184,6 +1184,19 @@ typedef enum {
jit_code_movi_w_f,
jit_code_movi_w_d, jit_code_movi_ww_d,
+#define jit_fmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fmar_f, u, v, w, x)
+#define jit_fmai_f(u,v,w,x) _jit_fmai_f(_jit, u, v, w, x)
+ jit_code_fmar_f, jit_code_fmai_f,
+#define jit_fmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_f, u, v, w, x)
+#define jit_fmsi_f(u,v,w,x) _jit_fmsi_f(_jit, u, v, w, x)
+ jit_code_fmsr_f, jit_code_fmsi_f,
+#define jit_fmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fmar_d, u, v, w, x)
+#define jit_fmai_d(u,v,w,x) _jit_fmai_d(_jit, u, v, w, x)
+ jit_code_fmar_d, jit_code_fmai_d,
+#define jit_fmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_d, u, v, w, x)
+#define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x)
+ jit_code_fmsr_d, jit_code_fmsi_d,
+
jit_code_last_code
} jit_code_t;
@@ -1304,9 +1317,13 @@ extern void _jit_pushargi_f(jit_state_t*, jit_float32_t);
extern void _jit_retr_f(jit_state_t*, jit_fpr_t);
extern void _jit_reti_f(jit_state_t*, jit_float32_t);
extern void _jit_retval_f(jit_state_t*, jit_fpr_t);
-extern void _jit_negi_f(jit_state_t*, jit_int32_t, jit_float32_t);
-extern void _jit_absi_f(jit_state_t*, jit_int32_t, jit_float32_t);
-extern void _jit_sqrti_f(jit_state_t*, jit_int32_t, jit_float32_t);
+extern void _jit_negi_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_absi_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_sqrti_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_fmai_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fmsi_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
extern jit_node_t *_jit_arg_d(jit_state_t*);
extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*);
@@ -1317,9 +1334,13 @@ extern void _jit_pushargi_d(jit_state_t*, jit_float64_t);
extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
extern void _jit_reti_d(jit_state_t*, jit_float64_t);
extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
-extern void _jit_negi_d(jit_state_t*, jit_int32_t, jit_float64_t);
-extern void _jit_absi_d(jit_state_t*, jit_int32_t, jit_float64_t);
-extern void _jit_sqrti_d(jit_state_t*, jit_int32_t, jit_float64_t);
+extern void _jit_negi_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_absi_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_sqrti_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_fmai_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fmsi_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
#define jit_get_reg(s) _jit_get_reg(_jit,s)
extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
@@ -1369,6 +1390,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
jit_int32_t, jit_int32_t,
jit_word_t, jit_word_t);
+#define jit_new_node_wqw(c,u,l,h,w) _jit_new_node_wqw(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqw(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_word_t);
#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
jit_word_t, jit_word_t,
@@ -1376,9 +1401,17 @@ extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
jit_word_t, jit_word_t, jit_float32_t);
+#define jit_new_node_wqf(c,u,l,h,w) _jit_new_node_wqf(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqf(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_float32_t);
#define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w)
extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t,
jit_word_t, jit_word_t, jit_float64_t);
+#define jit_new_node_wqd(c,u,l,h,w) _jit_new_node_wqd(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqd(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_float64_t);
#define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w)
extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t,
jit_pointer_t, jit_word_t, jit_word_t);
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index a55a20f..dde41ba 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -257,8 +257,18 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
(void)jit_new_node_wf(code, u, v); \
jit_synth_inc(); \
} while (0)
+#define jit_inc_synth_wqf(name, u, v, w, x) \
+ do { \
+ (void)jit_new_node_wqf(jit_code_##name, u, v, w, x); \
+ jit_synth_inc(); \
+ } while (0)
#define jit_inc_synth_wd(name, u, v) \
jit_code_inc_synth_wd(jit_code_##name, u, v)
+#define jit_inc_synth_wqd(name, u, v, w, x) \
+ do { \
+ (void)jit_new_node_wqd(jit_code_##name, u, v, w, x); \
+ jit_synth_inc(); \
+ } while (0)
#define jit_code_inc_synth_wd(code, u, v) \
do { \
(void)jit_new_node_wd(code, u, v); \
@@ -340,10 +350,11 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
#define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */
#define jit_cc_a1_reg 0x00000200 /* arg1 is a register */
#define jit_cc_a1_chg 0x00000400 /* arg1 is modified */
-#define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */
-#define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */
-#define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */
-#define jit_cc_a1_arg 0x00008000 /* arg1 is an argument node */
+#define jit_cc_a1_int 0x00000800 /* arg1 is immediate word */
+#define jit_cc_a1_flt 0x00001000 /* arg1 is immediate float */
+#define jit_cc_a1_dbl 0x00002000 /* arg1 is immediate double */
+#define jit_cc_a1_arg 0x00004000 /* arg1 is an argument node */
+#define jit_cc_a1_rlh 0x00008000 /* arg1 is a register pair */
#define jit_cc_a2_reg 0x00010000 /* arg2 is a register */
#define jit_cc_a2_chg 0x00020000 /* arg2 is modified */
#define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */
diff --git a/lib/jit_names.c b/lib/jit_names.c
index 83ecb44..a6bb023 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -287,4 +287,8 @@ static char *code_name[] = {
"unstr_x", "unsti_x",
"movi_w_f",
"movi_w_d", "movi_ww_d",
+ "fmar_f", "fmai_f",
+ "fmsr_f", "fmsi_f",
+ "fmar_d", "fmai_d",
+ "fmsr_d", "fmsi_d",
};
diff --git a/lib/jit_print.c b/lib/jit_print.c
index 3113255..bd920c2 100644
--- a/lib/jit_print.c
+++ b/lib/jit_print.c
@@ -126,7 +126,8 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
value = jit_classify(node->code) &
(jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp|
jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg|
- jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg|
+ jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a1_int|
+ jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg|
jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh);
if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) ||
node->code == jit_code_finishr ||
@@ -273,6 +274,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
print_str(" ("); print_int(node->w.q.l);
print_chr(' '); print_int(node->w.q.h);
print_str(") "); return;
+ r_q_r:
+ print_chr(' '); print_reg(node->u.w);
+ print_str(" ("); print_reg(node->v.q.l);
+ print_chr(' '); print_reg(node->v.q.h);
+ print_str(") "); print_reg(node->w.w);
+ return;
r_r_f:
print_chr(' '); print_reg(node->u.w);
print_chr(' '); print_reg(node->v.w);
@@ -282,6 +289,16 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
else
print_flt(node->w.f);
return;
+ r_q_f:
+ print_chr(' '); print_reg(node->u.w);
+ print_str(" ("); print_reg(node->v.q.l);
+ print_chr(' '); print_reg(node->v.q.h);
+ print_str(") ");
+ if (node->flag & jit_flag_data)
+ print_flt(*(jit_float32_t *)node->w.n->u.w);
+ else
+ print_flt(node->w.f);
+ return;
r_r_d:
print_chr(' '); print_reg(node->u.w);
print_chr(' '); print_reg(node->v.w);
@@ -291,6 +308,16 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
else
print_flt(node->w.d);
return;
+ r_q_d:
+ print_chr(' '); print_reg(node->u.w);
+ print_str(" ("); print_reg(node->v.q.l);
+ print_chr(' '); print_reg(node->v.q.h);
+ print_str(") ");
+ if (node->flag & jit_flag_data)
+ print_flt(*(jit_float64_t *)node->w.n->u.w);
+ else
+ print_flt(node->w.d);
+ return;
w_r_r:
print_chr(' '); print_hex(node->u.w);
print_chr(' '); print_reg(node->v.w);
@@ -441,6 +468,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
goto n_r_f;
case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_dbl:
goto n_r_d;
+ case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg:
+ goto r_q_r;
+ case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt:
+ goto r_q_f;
+ case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl:
+ goto r_q_d;
default:
abort();
}
diff --git a/lib/lightning.c b/lib/lightning.c
index 380c54d..19163ba 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1172,6 +1172,21 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code,
}
jit_node_t *
+_jit_new_node_wqw(jit_state_t *_jit, jit_code_t code,
+ jit_word_t u, jit_int32_t l,
+ jit_int32_t h, jit_word_t w)
+{
+ jit_node_t *node = new_node(code);
+ assert(!_jitc->realize);
+ assert(l != h);
+ node->u.w = u;
+ node->v.q.l = l;
+ node->v.q.h = h;
+ node->w.w = w;
+ return (link_node(node));
+}
+
+jit_node_t *
_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code,
jit_word_t u, jit_word_t v,
jit_int32_t l, jit_int32_t h)
@@ -1198,6 +1213,21 @@ _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
}
jit_node_t *
+_jit_new_node_wqf(jit_state_t *_jit, jit_code_t code,
+ jit_word_t u, jit_int32_t l,
+ jit_int32_t h, jit_float32_t w)
+{
+ jit_node_t *node = new_node(code);
+ assert(!_jitc->realize);
+ assert(l != h);
+ node->u.w = u;
+ node->v.q.l = l;
+ node->v.q.h = h;
+ node->w.f = w;
+ return (link_node(node));
+}
+
+jit_node_t *
_jit_new_node_wwd(jit_state_t *_jit, jit_code_t code,
jit_word_t u, jit_word_t v, jit_float64_t w)
{
@@ -1210,6 +1240,21 @@ _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code,
}
jit_node_t *
+_jit_new_node_wqd(jit_state_t *_jit, jit_code_t code,
+ jit_word_t u, jit_int32_t l,
+ jit_int32_t h, jit_float64_t w)
+{
+ jit_node_t *node = new_node(code);
+ assert(!_jitc->realize);
+ assert(l != h);
+ node->u.w = u;
+ node->v.q.l = l;
+ node->v.q.h = h;
+ node->w.d = w;
+ return (link_node(node));
+}
+
+jit_node_t *
_jit_new_node_pww(jit_state_t *_jit, jit_code_t code,
jit_pointer_t u, jit_word_t v, jit_word_t w)
{
@@ -1667,6 +1712,19 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
case jit_code_exti: case jit_code_exti_u: case jit_code_depi:
mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_rlh;
break;
+ case jit_code_fmar_f: case jit_code_fmar_d:
+ case jit_code_fmsr_f: case jit_code_fmsr_d:
+ mask = jit_cc_a0_reg|jit_cc_a0_chg|
+ jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg;
+ break;
+ case jit_code_fmai_f: case jit_code_fmsi_f:
+ mask = jit_cc_a0_reg|jit_cc_a0_chg|
+ jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt;
+ break;
+ case jit_code_fmai_d: case jit_code_fmsi_d:
+ mask = jit_cc_a0_reg|jit_cc_a0_chg|
+ jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl;
+ break;
default:
abort();
}
@@ -2097,13 +2155,22 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
}
}
}
- if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) {
- if (value & jit_cc_a1_chg) {
- jit_regset_clrbit(&_jitc->reglive, node->v.w);
- jit_regset_setbit(&_jitc->regmask, node->v.w);
+ if (value & jit_cc_a1_reg) {
+ if (value & jit_cc_a1_rlh) {
+ /* Assume registers are not changed */
+ if (!(node->v.q.l & jit_regno_patch))
+ jit_regset_setbit(&_jitc->reglive, node->v.q.l);
+ if (!(node->v.q.h & jit_regno_patch))
+ jit_regset_setbit(&_jitc->reglive, node->v.q.h);
+ }
+ else if (!(node->v.w & jit_regno_patch)) {
+ if (value & jit_cc_a1_chg) {
+ jit_regset_clrbit(&_jitc->reglive, node->v.w);
+ jit_regset_setbit(&_jitc->regmask, node->v.w);
+ }
+ else
+ jit_regset_setbit(&_jitc->reglive, node->v.w);
}
- else
- jit_regset_setbit(&_jitc->reglive, node->v.w);
}
if (value & jit_cc_a2_reg) {
if (value & jit_cc_a2_rlh) {
@@ -2151,8 +2218,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
else
jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.w));
}
- if (value & jit_cc_a1_reg)
- jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
+ if (value & jit_cc_a1_reg) {
+ if (value & jit_cc_a1_rlh) {
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.l));
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.h));
+ }
+ else
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
+ }
if (value & jit_cc_a2_reg) {
if (value & jit_cc_a2_rlh) {
jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l));
@@ -2190,8 +2263,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
else
jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.w));
}
- if (value & jit_cc_a1_reg)
- jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
+ if (value & jit_cc_a1_reg) {
+ if (value & jit_cc_a1_rlh) {
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.l));
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.h));
+ }
+ else
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
+ }
if (value & jit_cc_a2_reg) {
if (value & jit_cc_a2_rlh) {
jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l));
@@ -2576,21 +2655,51 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block)
/* Check argument registers in reverse order to properly
* handle registers that are both, argument and result */
value = jit_classify(node->code);
- if ((value & jit_cc_a2_reg) &&
- !(node->w.w & jit_regno_patch) &&
- jit_regset_tstbit(&block->regmask, node->w.w)) {
+ if (value & jit_cc_a2_reg) {
live = !(value & jit_cc_a2_chg);
- jit_regset_clrbit(&block->regmask, node->w.w);
- if (live)
- jit_regset_setbit(&block->reglive, node->w.w);
+ if (value & jit_cc_a2_rlh) {
+ /* Assume will not modify a pair in second argument */
+ assert(live);
+ if (!(node->w.q.l & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->w.q.l)) {
+ jit_regset_clrbit(&block->regmask, node->w.q.l);
+ }
+ if (!(node->w.q.h & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->w.q.h)) {
+ jit_regset_clrbit(&block->regmask, node->w.q.h);
+ }
+ }
+ else {
+ if (!(node->w.w & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->w.w)) {
+ jit_regset_clrbit(&block->regmask, node->w.w);
+ if (live)
+ jit_regset_setbit(&block->reglive, node->w.w);
+ }
+ }
}
- if ((value & jit_cc_a1_reg) &&
- !(node->v.w & jit_regno_patch) &&
- jit_regset_tstbit(&block->regmask, node->v.w)) {
+ if (value & jit_cc_a1_reg) {
live = !(value & jit_cc_a1_chg);
- jit_regset_clrbit(&block->regmask, node->v.w);
- if (live)
- jit_regset_setbit(&block->reglive, node->v.w);
+ if (value & jit_cc_a1_rlh) {
+ /* Assume will not modify a pair in second argument */
+ assert(live);
+ if (!(node->v.q.l & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->v.q.l)) {
+ jit_regset_clrbit(&block->regmask, node->v.q.l);
+ }
+ if (!(node->v.q.h & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->v.q.h)) {
+ jit_regset_clrbit(&block->regmask, node->v.q.h);
+ }
+ }
+ else {
+ if (!(node->v.w & jit_regno_patch) &&
+ jit_regset_tstbit(&block->regmask, node->v.w)) {
+ jit_regset_clrbit(&block->regmask, node->v.w);
+ if (live)
+ jit_regset_setbit(&block->reglive, node->v.w);
+ }
+ }
}
if (value & jit_cc_a0_reg) {
live = !(value & jit_cc_a0_chg);
@@ -2695,23 +2804,34 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block)
}
}
else {
- if (value & jit_cc_a2_reg) {
- if (!(node->w.w & jit_regno_patch)) {
- if (jit_regset_tstbit(&regmask, node->w.w)) {
- jit_regset_clrbit(&regmask, node->w.w);
- if (!(value & jit_cc_a2_chg))
- jit_regset_setbit(&reglive, node->w.w);
- }
+ if (!(node->w.w & jit_regno_patch)) {
+ if (jit_regset_tstbit(&regmask, node->w.w)) {
+ jit_regset_clrbit(&regmask, node->w.w);
+ if (!(value & jit_cc_a2_chg))
+ jit_regset_setbit(&reglive, node->w.w);
}
}
}
}
if (value & jit_cc_a1_reg) {
- if (!(node->v.w & jit_regno_patch)) {
- if (jit_regset_tstbit(&regmask, node->v.w)) {
- jit_regset_clrbit(&regmask, node->v.w);
- if (!(value & jit_cc_a1_chg))
- jit_regset_setbit(&reglive, node->v.w);
+ if (value & jit_cc_a1_rlh) {
+ if (!(node->v.q.l & jit_regno_patch)) {
+ /* Assume register is not changed */
+ if (jit_regset_tstbit(&regmask, node->v.q.l))
+ jit_regset_clrbit(&regmask, node->v.q.l);
+ }
+ if (!(node->v.q.h & jit_regno_patch)) {
+ if (jit_regset_tstbit(&regmask, node->v.q.h))
+ jit_regset_clrbit(&regmask, node->v.q.h);
+ }
+ }
+ else {
+ if (!(node->v.w & jit_regno_patch)) {
+ if (jit_regset_tstbit(&regmask, node->v.w)) {
+ jit_regset_clrbit(&regmask, node->v.w);
+ if (!(value & jit_cc_a1_chg))
+ jit_regset_setbit(&reglive, node->v.w);
+ }
}
}
}
@@ -2871,11 +2991,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
}
}
if (value & jit_cc_a1_reg) {
- if (!(node->v.w & jit_regno_patch)) {
- if (jit_regset_tstbit(mask, node->v.w)) {
- jit_regset_clrbit(mask, node->v.w);
- if (!(value & jit_cc_a1_chg))
- jit_regset_setbit(live, node->v.w);
+ if (value & jit_cc_a1_rlh) {
+ if (!(node->v.q.l & jit_regno_patch)) {
+ /* Assume register is not changed */
+ if (jit_regset_tstbit(mask, node->v.q.l))
+ jit_regset_clrbit(mask, node->v.q.l);
+ }
+ if (!(node->v.q.h & jit_regno_patch)) {
+ if (jit_regset_tstbit(mask, node->v.q.h))
+ jit_regset_clrbit(mask, node->v.q.h);
+ }
+ }
+ else {
+ if (!(node->v.w & jit_regno_patch)) {
+ if (jit_regset_tstbit(mask, node->v.w)) {
+ jit_regset_clrbit(mask, node->v.w);
+ if (!(value & jit_cc_a1_chg))
+ jit_regset_setbit(live, node->v.w);
+ }
}
}
}
@@ -3756,9 +3889,24 @@ _simplify(jit_state_t *_jit)
}
}
if (info & jit_cc_a1_chg) {
- regno = jit_regno(node->v.w);
- _jitc->values[regno].kind = 0;
- ++_jitc->gen[regno];
+#if 0
+ /* Assume registers are not changed */
+ if (info & jit_cc_a1_rlh) {
+ regno = jit_regno(node->v.q.l);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+ regno = jit_regno(node->v.q.h);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+ }
+ else {
+#endif
+ regno = jit_regno(node->v.w);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+#if 0
+ }
+#endif
}
if (info & jit_cc_a2_chg) {
#if 0
@@ -3982,8 +4130,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
node->u.w = patch;
}
}
- if ((value & jit_cc_a1_reg) && node->v.w == regno)
- node->v.w = patch;
+ if (value & jit_cc_a1_reg) {
+ if (value & jit_cc_a1_rlh) {
+ if (node->v.q.l == regno)
+ node->v.q.l = patch;
+ if (node->v.q.h == regno)
+ node->v.q.h = patch;
+ }
+ else {
+ if (node->v.w == regno)
+ node->v.w = patch;
+ }
+ }
if (value & jit_cc_a2_reg) {
if (value & jit_cc_a2_rlh) {
if (node->w.q.l == regno)
@@ -4384,7 +4542,7 @@ _movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
#endif
void
-_jit_negi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
+_jit_negi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
{
jit_inc_synth_wf(negi_f, u, v);
jit_movi_f(u, v);
@@ -4393,7 +4551,7 @@ _jit_negi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
}
void
-_jit_absi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
+_jit_absi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
{
jit_inc_synth_wf(absi_f, u, v);
jit_movi_f(u, v);
@@ -4402,7 +4560,7 @@ _jit_absi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
}
void
-_jit_sqrti_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
+_jit_sqrti_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
{
jit_inc_synth_wf(sqrti_f, u, v);
jit_movi_f(u, v);
@@ -4411,7 +4569,45 @@ _jit_sqrti_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v)
}
void
-_jit_negi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
+_jit_fmai_f(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqf(fmai_f, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_f(u, x);
+ jit_fmar_f(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_f(y, x);
+ jit_fmar_f(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_fmsi_f(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqf(fmai_f, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_f(u, x);
+ jit_fmsr_f(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_f(y, x);
+ jit_fmsr_f(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
{
jit_inc_synth_wd(negi_d, u, v);
jit_movi_d(u, v);
@@ -4420,7 +4616,7 @@ _jit_negi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
}
void
-_jit_absi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
+_jit_absi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
{
jit_inc_synth_wd(absi_d, u, v);
jit_movi_d(u, v);
@@ -4429,7 +4625,7 @@ _jit_absi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
}
void
-_jit_sqrti_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
+_jit_sqrti_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
{
jit_inc_synth_wd(sqrti_d, u, v);
jit_movi_d(u, v);
@@ -4437,6 +4633,44 @@ _jit_sqrti_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v)
jit_dec_synth();
}
+void
+_jit_fmai_d(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqd(fmai_d, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_d(u, x);
+ jit_fmar_d(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_d(y, x);
+ jit_fmar_d(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_fmsi_d(jit_state_t *_jit,
+ jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+ jit_int32_t y;
+ jit_inc_synth_wqd(fmai_d, u, v, w, x);
+ if (u != v && u != w) {
+ jit_movi_d(u, x);
+ jit_fmsr_d(u, v, w, u);
+ }
+ else {
+ y = jit_get_reg(jit_class_fpr);
+ jit_movi_d(y, x);
+ jit_fmsr_d(u, v, w, y);
+ jit_unget_reg(y);
+ }
+ jit_dec_synth();
+}
+
static void
_cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
generated by cgit v1.2.3 (git 2.39.1) at 2025年10月07日 00:26:42 +0000

AltStyle によって変換されたページ (->オリジナル) /