mips: Implement fma* and fms* - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:50:42 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年04月18日 17:50:42 -0300
commit00cfdd1da85d4e085cc79cba6bbdc262eda76287 (patch)
treee323b4d7a5ba92960d12aee17ec0f77b86f76d62
parent9743bdf79d805ad36a3ed1aa2456d5dba8a64706 (diff)
downloadlightning-00cfdd1da85d4e085cc79cba6bbdc262eda76287.tar.gz
mips: Implement fma* and fms*
Diffstat
-rw-r--r--lib/jit_mips-cpu.c 32
-rw-r--r--lib/jit_mips-fpu.c 187
-rw-r--r--lib/jit_mips.c 20
3 files changed, 225 insertions, 14 deletions
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 596ff62..24afbb2 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -27,6 +27,7 @@ typedef union {
struct { jit_uint32_t _:26; jit_uint32_t b : 6; } hc;
struct { jit_uint32_t _:21; jit_uint32_t b : 5; } rs;
struct { jit_uint32_t _:21; jit_uint32_t b : 5; } fm;
+ struct { jit_uint32_t _:21; jit_uint32_t b : 5; } fr;
struct { jit_uint32_t _:18; jit_uint32_t b : 3; } pD;
struct { jit_uint32_t _:19; jit_uint32_t b : 2; } pW;
struct { jit_uint32_t _:16; jit_uint32_t b : 5; } rt;
@@ -47,6 +48,7 @@ typedef union {
struct { jit_uint32_t b : 6; } hc;
struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } rs;
struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } fm;
+ struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } fr;
struct { jit_uint32_t _:11; jit_uint32_t b : 3; } pD;
struct { jit_uint32_t _:11; jit_uint32_t b : 2; } pW;
struct { jit_uint32_t _:11; jit_uint32_t b : 5; } rt;
@@ -996,7 +998,7 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
jit_int32_t reg0, jit_int32_t reg1)
{
jit_instr_t i;
- jit_int32_t reg, r0, r1, r2, regs[3];
+ jit_int32_t reg, r0, r1, r2/*, xreg*/, regs[3];
/* If will emit a pending instruction */
if (_jitc->inst.pend)
i.op = _jitc->inst.op;
@@ -1006,7 +1008,7 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
/* Else, a nop */
else
i.op = 0;
- regs[0] = regs[1] = regs[2] = -1;
+ regs[0] = regs[1] = regs[2]/* = xreg*/ = -1;
switch (i.hc.b) {
case MIPS_SPECIAL: /* 00 */
switch (i.tc.b) {
@@ -1296,6 +1298,9 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
goto three_fprs;
}
break;
+ case MIPS_MADDF: /* 18 */
+ case MIPS_MSUBF: /* 19 */
+ assert(jit_mips6_p());
case MIPS_SUB_fmt: /* 01 */
case MIPS_MUL_fmt: /* 02 */
case MIPS_DIV_fmt: /* 03 */
@@ -1436,6 +1441,29 @@ _jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
break;
}
break;
+ case MIPS_COP1X: /* 13 */
+ switch (i.tc.b) {
+ case MIPS_MADD_fmt_S:
+ case MIPS_MADD_fmt_D:
+ case MIPS_MSUB_fmt_S:
+ case MIPS_MSUB_fmt_D:
+ assert(!jit_mips6_p());
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.ft.b;
+ regs[1] = i.fs.b;
+ regs[2] = i.fd.b;
+ /* FIXME No need to compute and check it.
+ * If asking for a tmeporary fpr, code will
+ * be flushed. */
+ /* xreg = i.fr.b; */
+ }
+ break;
+ default:
+ abort();
+ }
+ break;
case MIPS_DAUI: /* JALX */ /* 1d */
/* Do not generate JALX. No microMIPS64 or MIPS16e support */
assert(jit_mips6_p() && i.rs.b != 0);
diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c
index 7484af5..9c4b620 100644
--- a/lib/jit_mips-fpu.c
+++ b/lib/jit_mips-fpu.c
@@ -53,6 +53,8 @@
# define MIPS_FLOOR_fmt_W 0x0f
# define MIPS_RECIP 0x15
# define MIPS_RSQRT 0x16
+# define MIPS_MADDF 0x18
+# define MIPS_MSUBF 0x19
# define MIPS_ALNV_PS 0x1e
# define MIPS_CVT_fmt_S 0x20
# define MIPS_CVT_fmt_D 0x21
@@ -62,18 +64,18 @@
# define MIPS_PLU 0x2d
# define MIPS_PUL 0x2e
# define MIPS_PUU 0x2f
-# define MIPS_MADD_fmt_S (0x20 | MIPS_fmt_S)
-# define MIPS_MADD_fmt_D (0x20 | MIPS_fmt_D)
-# define MIPS_MADD_fmt_PS (0x20 | MIPS_fmt_PS)
-# define MIPS_MSUB_fmt_S (0x28 | MIPS_fmt_S)
-# define MIPS_MSUB_fmt_D (0x28 | MIPS_fmt_D)
-# define MIPS_MSUB_fmt_PS (0x28 | MIPS_fmt_PS)
-# define MIPS_NMADD_fmt_S (0x30 | MIPS_fmt_S)
-# define MIPS_NMADD_fmt_D (0x30 | MIPS_fmt_D)
-# define MIPS_NMADD_fmt_PS (0x30 | MIPS_fmt_PS)
-# define MIPS_NMSUB_fmt_S (0x38 | MIPS_fmt_S)
-# define MIPS_NMSUB_fmt_D (0x38 | MIPS_fmt_D)
-# define MIPS_NMSUB_fmt_PS (0x38 | MIPS_fmt_PS)
+# define MIPS_MADD_fmt_S 0x20
+# define MIPS_MADD_fmt_D 0x21
+# define MIPS_MADD_fmt_PS 0x22
+# define MIPS_MSUB_fmt_S 0x28
+# define MIPS_MSUB_fmt_D 0x29
+# define MIPS_MSUB_fmt_PS 0x2a
+# define MIPS_NMADD_fmt_S 0x30
+# define MIPS_NMADD_fmt_D 0x31
+# define MIPS_NMADD_fmt_PS 0x32
+# define MIPS_NMSUB_fmt_S 0x38
+# define MIPS_NMSUB_fmt_D 0x39
+# define MIPS_NMSUB_fmt_PS 0x3a
# define MIPS_cond_F 0x30
# define MIPS_cond_UN 0x31
# define MIPS_cond_EQ 0x32
@@ -144,6 +146,14 @@
# define TRUNC_L_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_L)
# define TRUNC_W_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_W)
# define TRUNC_W_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_W)
+# define MADD_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_S)
+# define MADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_D)
+# define MSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_S)
+# define MSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_D)
+# define MADDF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MADDF)
+# define MADDF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MADDF)
+# define MSUBF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MSUBF)
+# define MSUBF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MSUBF)
# define LWC1(rt, of, rb) hrri(MIPS_LWC1, rb, rt, of)
# define SWC1(rt, of, rb) hrri(MIPS_SWC1, rb, rt, of)
# define LDC1(rt, of, rb) hrri(MIPS_LDC1, rb, rt, of)
@@ -313,6 +323,12 @@ static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
static void _unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
#define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1)
static void _unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+# define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3)
+static void _fmar_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3)
+static void _fmsr_f(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define movr_f(r0, r1) _movr_f(_jit, r0, r1)
static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi_f(r0, i0) _movi_f(_jit, r0, i0)
@@ -360,6 +376,12 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define stxi_d(i0, r0, r1) _stxi_d(_jit, i0, r0, r1)
static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+# define fmar_d(r0, r1, r2, r3) _fmar_d(_jit, r0, r1, r2, r3)
+static void _fmar_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+# define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3)
+static void _fmsr_d(jit_state_t*,
+ jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define movr_d(r0, r1) _movr_d(_jit, r0, r1)
static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi_d(r0, i0) _movi_d(_jit, r0, i0)
@@ -897,6 +919,77 @@ _unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
fallback_unsti_x(i0, r0, i1);
}
+
+static void
+_fmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MADDF_S(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MADDF_S(rn(t0), r2, r1);
+ movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ MADD_S(r0, r3, r2, r1);
+ }
+ else {
+ if (r0 != r3) {
+ mulr_f(r0, r1, r2);
+ addr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ mulr_f(rn(t0), r1, r2);
+ addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_fmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ /* fd = fd - (fs * ft) */
+ if (r0 == r3)
+ MSUBF_S(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_f(rn(t0), r3);
+ MSUBF_S(rn(t0), r2, r1);
+ movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ negr_f(r0, r0);
+ }
+ else
+ MSUB_S(r0, r3, r2, r1);
+ }
+ else {
+ if (r0 != r3) {
+ mulr_f(r0, r1, r2);
+ subr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ mulr_f(rn(t0), r1, r2);
+ subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
static void
_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
@@ -1214,6 +1307,76 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
}
static void
+_fmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ if (r0 == r3)
+ MADDF_D(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
+ MADDF_D(rn(t0), r2, r1);
+ movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else
+ MADD_D(r0, r3, r2, r1);
+ }
+ else {
+ if (r0 != r3) {
+ mulr_d(r0, r1, r2);
+ addr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ mulr_d(rn(t0), r1, r2);
+ addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_fmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_mips2_p()) {
+ if (jit_mips6_p()) {
+ /* fd = fd - (fs * ft) */
+ if (r0 == r3)
+ MSUBF_D(r0, r2, r1);
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ movr_d(rn(t0), r3);
+ MSUBF_D(rn(t0), r2, r1);
+ movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ negr_d(r0, r0);
+ }
+ else
+ MSUB_D(r0, r3, r2, r1);
+ }
+ else {
+ if (r0 != r3) {
+ mulr_d(r0, r1, r2);
+ subr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr);
+ mulr_d(rn(t0), r1, r2);
+ subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 3e1c625..17fd531 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1800,6 +1800,16 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _f);
case_rr(neg, _f);
case_rr(sqrt, _f);
+ case jit_code_fmar_f:
+ fmar_f(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmai_f:
+ break;
+ case jit_code_fmsr_f:
+ fmsr_f(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmsi_f:
+ break;
case_rr(ext, _f);
case_rr(ld, _f);
case_rw(ld, _f);
@@ -1895,6 +1905,16 @@ _emit_code(jit_state_t *_jit)
case_rr(abs, _d);
case_rr(neg, _d);
case_rr(sqrt, _d);
+ case jit_code_fmar_d:
+ fmar_d(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmai_d:
+ break;
+ case jit_code_fmsr_d:
+ fmsr_d(rn(node->u.w), rn(node->v.q.l),
+ rn(node->v.q.h), rn(node->w.w));
+ case jit_code_fmsi_d:
+ break;
case_rr(ext, _d);
case_rr(ld, _d);
case_rw(ld, _d);
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月15日 11:35:07 +0000

AltStyle によって変換されたページ (->オリジナル) /