author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年04月18日 17:35:56 -0300 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2023年04月18日 17:35:56 -0300 |
commit | c1e52afb6307e6eb74ded0c910232e895d07ddcd (patch) | |
tree | d9292666cc74b0a0b1772f661e6989590c17d1c6 | |
parent | 7b21297cb9c2ce5b8ed8f3364d77d4eafe38773d (diff) | |
download | lightning-c1e52afb6307e6eb74ded0c910232e895d07ddcd.tar.gz |
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | include/lightning.h.in | 45 | ||||
-rw-r--r-- | include/lightning/jit_private.h | 19 | ||||
-rw-r--r-- | lib/jit_names.c | 4 | ||||
-rw-r--r-- | lib/jit_print.c | 35 | ||||
-rw-r--r-- | lib/lightning.c | 334 |
@@ -1,3 +1,20 @@ +2023年04月18日 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h.in: Define new fmar_f, fmai_f, fmsr_f, + fmsi_f, fmar_d, fmai_d, fmsr_d and fmsi_d instructions, that + add support for fused multiply add/sub, in the format + r0 = r1 * r2 +/- r3. + * include/lightning/jit_private.h: Add helper macros for debug + output. + * lib/jit_names.c: Add strings for debug output. + * lib/jit_print.c: Print debug output for the new instructions. + * lib/lightning.c: Add logic for the new register pair in the + 'v' (second) field of jit_node_t. The new pattern is required + to allow having a 'double' immediate in the last argument, for + the versions with immediates. The versions with immediates are + added for consistency, as they should be very rarely used in + common usage of fused multiply add/sub. + 2023年04月06日 Paulo Andrade <pcpa@gnu.org> * include/lightning.h.in: Define new movi_w_f, movi_w_d and diff --git a/include/lightning.h.in b/include/lightning.h.in index 53c7f1f..41b96e2 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -1184,6 +1184,19 @@ typedef enum { jit_code_movi_w_f, jit_code_movi_w_d, jit_code_movi_ww_d, +#define jit_fmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fmar_f, u, v, w, x) +#define jit_fmai_f(u,v,w,x) _jit_fmai_f(_jit, u, v, w, x) + jit_code_fmar_f, jit_code_fmai_f, +#define jit_fmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_f, u, v, w, x) +#define jit_fmsi_f(u,v,w,x) _jit_fmsi_f(_jit, u, v, w, x) + jit_code_fmsr_f, jit_code_fmsi_f, +#define jit_fmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fmar_d, u, v, w, x) +#define jit_fmai_d(u,v,w,x) _jit_fmai_d(_jit, u, v, w, x) + jit_code_fmar_d, jit_code_fmai_d, +#define jit_fmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_d, u, v, w, x) +#define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x) + jit_code_fmsr_d, jit_code_fmsi_d, + jit_code_last_code } jit_code_t; @@ -1304,9 +1317,13 @@ extern void _jit_pushargi_f(jit_state_t*, jit_float32_t); extern void _jit_retr_f(jit_state_t*, jit_fpr_t); extern void _jit_reti_f(jit_state_t*, jit_float32_t); extern void _jit_retval_f(jit_state_t*, jit_fpr_t); -extern void _jit_negi_f(jit_state_t*, jit_int32_t, jit_float32_t); -extern void _jit_absi_f(jit_state_t*, jit_int32_t, jit_float32_t); -extern void _jit_sqrti_f(jit_state_t*, jit_int32_t, jit_float32_t); +extern void _jit_negi_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_absi_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_sqrti_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_fmai_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fmsi_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); extern jit_node_t *_jit_arg_d(jit_state_t*); extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*); @@ -1317,9 +1334,13 @@ extern void _jit_pushargi_d(jit_state_t*, jit_float64_t); extern void _jit_retr_d(jit_state_t*, jit_fpr_t); extern void _jit_reti_d(jit_state_t*, jit_float64_t); extern void _jit_retval_d(jit_state_t*, jit_fpr_t); -extern void _jit_negi_d(jit_state_t*, jit_int32_t, jit_float64_t); -extern void _jit_absi_d(jit_state_t*, jit_int32_t, jit_float64_t); -extern void _jit_sqrti_d(jit_state_t*, jit_int32_t, jit_float64_t); +extern void _jit_negi_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_absi_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_sqrti_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_fmai_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fmsi_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); #define jit_get_reg(s) _jit_get_reg(_jit,s) extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t); @@ -1369,6 +1390,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, jit_int32_t, jit_int32_t, jit_word_t, jit_word_t); +#define jit_new_node_wqw(c,u,l,h,w) _jit_new_node_wqw(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqw(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_word_t); #define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h) extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, @@ -1376,9 +1401,17 @@ extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float32_t); +#define jit_new_node_wqf(c,u,l,h,w) _jit_new_node_wqf(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqf(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_float32_t); #define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float64_t); +#define jit_new_node_wqd(c,u,l,h,w) _jit_new_node_wqd(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqd(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_float64_t); #define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t, jit_pointer_t, jit_word_t, jit_word_t); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index a55a20f..dde41ba 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -257,8 +257,18 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, (void)jit_new_node_wf(code, u, v); \ jit_synth_inc(); \ } while (0) +#define jit_inc_synth_wqf(name, u, v, w, x) \ + do { \ + (void)jit_new_node_wqf(jit_code_##name, u, v, w, x); \ + jit_synth_inc(); \ + } while (0) #define jit_inc_synth_wd(name, u, v) \ jit_code_inc_synth_wd(jit_code_##name, u, v) +#define jit_inc_synth_wqd(name, u, v, w, x) \ + do { \ + (void)jit_new_node_wqd(jit_code_##name, u, v, w, x); \ + jit_synth_inc(); \ + } while (0) #define jit_code_inc_synth_wd(code, u, v) \ do { \ (void)jit_new_node_wd(code, u, v); \ @@ -340,10 +350,11 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */ #define jit_cc_a1_reg 0x00000200 /* arg1 is a register */ #define jit_cc_a1_chg 0x00000400 /* arg1 is modified */ -#define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */ -#define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */ -#define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */ -#define jit_cc_a1_arg 0x00008000 /* arg1 is an argument node */ +#define jit_cc_a1_int 0x00000800 /* arg1 is immediate word */ +#define jit_cc_a1_flt 0x00001000 /* arg1 is immediate float */ +#define jit_cc_a1_dbl 0x00002000 /* arg1 is immediate double */ +#define jit_cc_a1_arg 0x00004000 /* arg1 is an argument node */ +#define jit_cc_a1_rlh 0x00008000 /* arg1 is a register pair */ #define jit_cc_a2_reg 0x00010000 /* arg2 is a register */ #define jit_cc_a2_chg 0x00020000 /* arg2 is modified */ #define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */ diff --git a/lib/jit_names.c b/lib/jit_names.c index 83ecb44..a6bb023 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -287,4 +287,8 @@ static char *code_name[] = { "unstr_x", "unsti_x", "movi_w_f", "movi_w_d", "movi_ww_d", + "fmar_f", "fmai_f", + "fmsr_f", "fmsi_f", + "fmar_d", "fmai_d", + "fmsr_d", "fmsi_d", }; diff --git a/lib/jit_print.c b/lib/jit_print.c index 3113255..bd920c2 100644 --- a/lib/jit_print.c +++ b/lib/jit_print.c @@ -126,7 +126,8 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) value = jit_classify(node->code) & (jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp| jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg| - jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a1_int| + jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg| jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh); if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) || node->code == jit_code_finishr || @@ -273,6 +274,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) print_str(" ("); print_int(node->w.q.l); print_chr(' '); print_int(node->w.q.h); print_str(") "); return; + r_q_r: + print_chr(' '); print_reg(node->u.w); + print_str(" ("); print_reg(node->v.q.l); + print_chr(' '); print_reg(node->v.q.h); + print_str(") "); print_reg(node->w.w); + return; r_r_f: print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -282,6 +289,16 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) else print_flt(node->w.f); return; + r_q_f: + print_chr(' '); print_reg(node->u.w); + print_str(" ("); print_reg(node->v.q.l); + print_chr(' '); print_reg(node->v.q.h); + print_str(") "); + if (node->flag & jit_flag_data) + print_flt(*(jit_float32_t *)node->w.n->u.w); + else + print_flt(node->w.f); + return; r_r_d: print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -291,6 +308,16 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) else print_flt(node->w.d); return; + r_q_d: + print_chr(' '); print_reg(node->u.w); + print_str(" ("); print_reg(node->v.q.l); + print_chr(' '); print_reg(node->v.q.h); + print_str(") "); + if (node->flag & jit_flag_data) + print_flt(*(jit_float64_t *)node->w.n->u.w); + else + print_flt(node->w.d); + return; w_r_r: print_chr(' '); print_hex(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -441,6 +468,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) goto n_r_f; case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_dbl: goto n_r_d; + case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg: + goto r_q_r; + case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt: + goto r_q_f; + case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl: + goto r_q_d; default: abort(); } diff --git a/lib/lightning.c b/lib/lightning.c index 380c54d..19163ba 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1172,6 +1172,21 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code, } jit_node_t * +_jit_new_node_wqw(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_word_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.w = w; + return (link_node(node)); +} + +jit_node_t * _jit_new_node_wwq(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_int32_t l, jit_int32_t h) @@ -1198,6 +1213,21 @@ _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, } jit_node_t * +_jit_new_node_wqf(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_float32_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.f = w; + return (link_node(node)); +} + +jit_node_t * _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float64_t w) { @@ -1210,6 +1240,21 @@ _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code, } jit_node_t * +_jit_new_node_wqd(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_float64_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.d = w; + return (link_node(node)); +} + +jit_node_t * _jit_new_node_pww(jit_state_t *_jit, jit_code_t code, jit_pointer_t u, jit_word_t v, jit_word_t w) { @@ -1667,6 +1712,19 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_exti: case jit_code_exti_u: case jit_code_depi: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_rlh; break; + case jit_code_fmar_f: case jit_code_fmar_d: + case jit_code_fmsr_f: case jit_code_fmsr_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg; + break; + case jit_code_fmai_f: case jit_code_fmsi_f: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt; + break; + case jit_code_fmai_d: case jit_code_fmsi_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl; + break; default: abort(); } @@ -2097,13 +2155,22 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) } } } - if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) { - if (value & jit_cc_a1_chg) { - jit_regset_clrbit(&_jitc->reglive, node->v.w); - jit_regset_setbit(&_jitc->regmask, node->v.w); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + /* Assume registers are not changed */ + if (!(node->v.q.l & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->v.q.l); + if (!(node->v.q.h & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->v.q.h); + } + else if (!(node->v.w & jit_regno_patch)) { + if (value & jit_cc_a1_chg) { + jit_regset_clrbit(&_jitc->reglive, node->v.w); + jit_regset_setbit(&_jitc->regmask, node->v.w); + } + else + jit_regset_setbit(&_jitc->reglive, node->v.w); } - else - jit_regset_setbit(&_jitc->reglive, node->v.w); } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { @@ -2151,8 +2218,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.w)); } - if (value & jit_cc_a1_reg) - jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.l)); + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.h)); + } + else + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l)); @@ -2190,8 +2263,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.w)); } - if (value & jit_cc_a1_reg) - jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.l)); + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.h)); + } + else + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l)); @@ -2576,21 +2655,51 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block) /* Check argument registers in reverse order to properly * handle registers that are both, argument and result */ value = jit_classify(node->code); - if ((value & jit_cc_a2_reg) && - !(node->w.w & jit_regno_patch) && - jit_regset_tstbit(&block->regmask, node->w.w)) { + if (value & jit_cc_a2_reg) { live = !(value & jit_cc_a2_chg); - jit_regset_clrbit(&block->regmask, node->w.w); - if (live) - jit_regset_setbit(&block->reglive, node->w.w); + if (value & jit_cc_a2_rlh) { + /* Assume will not modify a pair in second argument */ + assert(live); + if (!(node->w.q.l & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.q.l)) { + jit_regset_clrbit(&block->regmask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.q.h)) { + jit_regset_clrbit(&block->regmask, node->w.q.h); + } + } + else { + if (!(node->w.w & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.w)) { + jit_regset_clrbit(&block->regmask, node->w.w); + if (live) + jit_regset_setbit(&block->reglive, node->w.w); + } + } } - if ((value & jit_cc_a1_reg) && - !(node->v.w & jit_regno_patch) && - jit_regset_tstbit(&block->regmask, node->v.w)) { + if (value & jit_cc_a1_reg) { live = !(value & jit_cc_a1_chg); - jit_regset_clrbit(&block->regmask, node->v.w); - if (live) - jit_regset_setbit(&block->reglive, node->v.w); + if (value & jit_cc_a1_rlh) { + /* Assume will not modify a pair in second argument */ + assert(live); + if (!(node->v.q.l & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.q.l)) { + jit_regset_clrbit(&block->regmask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.q.h)) { + jit_regset_clrbit(&block->regmask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.w)) { + jit_regset_clrbit(&block->regmask, node->v.w); + if (live) + jit_regset_setbit(&block->reglive, node->v.w); + } + } } if (value & jit_cc_a0_reg) { live = !(value & jit_cc_a0_chg); @@ -2695,23 +2804,34 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block) } } else { - if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->w.w)) { - jit_regset_clrbit(®mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(®live, node->w.w); - } + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.w)) { + jit_regset_clrbit(®mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(®live, node->w.w); } } } } if (value & jit_cc_a1_reg) { - if (!(node->v.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->v.w)) { - jit_regset_clrbit(®mask, node->v.w); - if (!(value & jit_cc_a1_chg)) - jit_regset_setbit(®live, node->v.w); + if (value & jit_cc_a1_rlh) { + if (!(node->v.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(®mask, node->v.q.l)) + jit_regset_clrbit(®mask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->v.q.h)) + jit_regset_clrbit(®mask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->v.w)) { + jit_regset_clrbit(®mask, node->v.w); + if (!(value & jit_cc_a1_chg)) + jit_regset_setbit(®live, node->v.w); + } } } } @@ -2871,11 +2991,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, } } if (value & jit_cc_a1_reg) { - if (!(node->v.w & jit_regno_patch)) { - if (jit_regset_tstbit(mask, node->v.w)) { - jit_regset_clrbit(mask, node->v.w); - if (!(value & jit_cc_a1_chg)) - jit_regset_setbit(live, node->v.w); + if (value & jit_cc_a1_rlh) { + if (!(node->v.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(mask, node->v.q.l)) + jit_regset_clrbit(mask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->v.q.h)) + jit_regset_clrbit(mask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->v.w)) { + jit_regset_clrbit(mask, node->v.w); + if (!(value & jit_cc_a1_chg)) + jit_regset_setbit(live, node->v.w); + } } } } @@ -3756,9 +3889,24 @@ _simplify(jit_state_t *_jit) } } if (info & jit_cc_a1_chg) { - regno = jit_regno(node->v.w); - _jitc->values[regno].kind = 0; - ++_jitc->gen[regno]; +#if 0 + /* Assume registers are not changed */ + if (info & jit_cc_a1_rlh) { + regno = jit_regno(node->v.q.l); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + regno = jit_regno(node->v.q.h); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + } + else { +#endif + regno = jit_regno(node->v.w); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; +#if 0 + } +#endif } if (info & jit_cc_a2_chg) { #if 0 @@ -3982,8 +4130,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, node->u.w = patch; } } - if ((value & jit_cc_a1_reg) && node->v.w == regno) - node->v.w = patch; + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + if (node->v.q.l == regno) + node->v.q.l = patch; + if (node->v.q.h == regno) + node->v.q.h = patch; + } + else { + if (node->v.w == regno) + node->v.w = patch; + } + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { if (node->w.q.l == regno) @@ -4384,7 +4542,7 @@ _movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) #endif void -_jit_negi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) +_jit_negi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) { jit_inc_synth_wf(negi_f, u, v); jit_movi_f(u, v); @@ -4393,7 +4551,7 @@ _jit_negi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) } void -_jit_absi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) +_jit_absi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) { jit_inc_synth_wf(absi_f, u, v); jit_movi_f(u, v); @@ -4402,7 +4560,7 @@ _jit_absi_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) } void -_jit_sqrti_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) +_jit_sqrti_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) { jit_inc_synth_wf(sqrti_f, u, v); jit_movi_f(u, v); @@ -4411,7 +4569,45 @@ _jit_sqrti_f(jit_state_t *_jit, jit_int32_t u, jit_float32_t v) } void -_jit_negi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) +_jit_fmai_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fmar_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fmar_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fmsi_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fmsr_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fmsr_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) { jit_inc_synth_wd(negi_d, u, v); jit_movi_d(u, v); @@ -4420,7 +4616,7 @@ _jit_negi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) } void -_jit_absi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) +_jit_absi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) { jit_inc_synth_wd(absi_d, u, v); jit_movi_d(u, v); @@ -4429,7 +4625,7 @@ _jit_absi_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) } void -_jit_sqrti_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) +_jit_sqrti_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) { jit_inc_synth_wd(sqrti_d, u, v); jit_movi_d(u, v); @@ -4437,6 +4633,44 @@ _jit_sqrti_d(jit_state_t *_jit, jit_int32_t u, jit_float64_t v) jit_dec_synth(); } +void +_jit_fmai_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fmar_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fmar_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fmsi_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fmsr_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fmsr_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + static void _cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { |