Add support for Hitachi SH processors - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
path: root/lib/jit_sh-cpu.c
diff options
context:
space:
mode:
authorPaul Cercueil <paul@crapouillou.net>2024年10月23日 13:07:09 +0200
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2024年10月23日 11:46:34 -0300
commit808fdde9e81cc1f43fd3ef3b01d24744c18bc123 (patch)
tree84c6d64283b14abd1b7b6fc0cbacf7f39bff28cb /lib/jit_sh-cpu.c
parent0203b5703861476f374ab7282c1aa789aab7a393 (diff)
downloadlightning-808fdde9e81cc1f43fd3ef3b01d24744c18bc123.tar.gz
Add support for Hitachi SH processors
Add support for the SH2, SH3 and SH4 processors from Hitachi. All tests pass except for ccall which is affected by a GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115948 Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Diffstat (limited to 'lib/jit_sh-cpu.c')
-rw-r--r--lib/jit_sh-cpu.c 3209
1 files changed, 3209 insertions, 0 deletions
diff --git a/lib/jit_sh-cpu.c b/lib/jit_sh-cpu.c
new file mode 100644
index 0000000..cfb8b26
--- /dev/null
+++ b/lib/jit_sh-cpu.c
@@ -0,0 +1,3209 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paul Cercueil
+ */
+
+#if PROTO
+
+# ifdef __SH4_SINGLE__
+# define SH_DEFAULT_FPU_MODE 0
+# else
+# define SH_DEFAULT_FPU_MODE 1
+# endif
+
+# ifndef SH_HAS_FPU
+# ifdef __SH_FPU_ANY__
+# define SH_HAS_FPU 1
+# else
+# define SH_HAS_FPU 0
+# endif
+# endif
+
+# ifdef __SH4_SINGLE_ONLY__
+# define SH_SINGLE_ONLY 1
+# else
+# define SH_SINGLE_ONLY 0
+# endif
+
+
+struct jit_instr_ni {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ jit_uint16_t i :8;
+ jit_uint16_t n :4;
+ jit_uint16_t c :4;
+#else
+ jit_uint16_t c :4;
+ jit_uint16_t n :4;
+ jit_uint16_t i :8;
+#endif
+};
+
+struct jit_instr_nmd {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ jit_uint16_t d :4;
+ jit_uint16_t m :4;
+ jit_uint16_t n :4;
+ jit_uint16_t c :4;
+#else
+ jit_uint16_t c :4;
+ jit_uint16_t n :4;
+ jit_uint16_t m :4;
+ jit_uint16_t d :4;
+#endif
+};
+
+struct jit_instr_md {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ jit_uint16_t d :4;
+ jit_uint16_t m :4;
+ jit_uint16_t c :8;
+#else
+ jit_uint16_t c :8;
+ jit_uint16_t m :4;
+ jit_uint16_t d :4;
+#endif
+};
+
+struct jit_instr_d {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ jit_uint16_t d :12;
+ jit_uint16_t c :4;
+#else
+ jit_uint16_t c :4;
+ jit_uint16_t d :12;
+#endif
+};
+
+typedef union {
+ struct jit_instr_ni ni;
+ struct jit_instr_nmd nmd;
+ struct jit_instr_md md;
+ struct jit_instr_d d;
+ jit_uint16_t op;
+} jit_instr_t;
+
+static void _cni(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void
+_cnmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void _cmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void _cd(jit_state_t*,jit_uint16_t,jit_uint16_t);
+
+# define STRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x4)
+# define STRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x5)
+# define STRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x6)
+# define MULL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x7)
+# define LDRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xc)
+# define LDRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xd)
+# define LDRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xe)
+# define BSRF(rn) _cni(_jit, 0x0, rn, 0x03)
+# define STCGBR(rn) _cni(_jit, 0x0, rn, 0x12)
+# define STSH(rn) _cni(_jit, 0x0, rn, 0x0a)
+# define STSL(rn) _cni(_jit, 0x0, rn, 0x1a)
+# define BRAF(rn) _cni(_jit, 0x0, rn, 0x23)
+# define MOVT(rn) _cni(_jit, 0x0, rn, 0x29)
+
+# define STSPR(rn) _cni(_jit, 0x0, rn, 0x2a)
+# define STSUL(rn) _cni(_jit, 0x0, rn, 0x5a)
+# define STSFP(rn) _cni(_jit, 0x0, rn, 0x6a)
+
+# define STDL(rn, rm, imm) _cnmd(_jit, 0x1, rn, rm, imm)
+
+# define STB(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x0)
+# define STW(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x1)
+# define STL(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x2)
+# define STBU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x4)
+# define STWU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x5)
+# define STLU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x6)
+# define DIV0S(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x7)
+# define TST(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x8)
+# define AND(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x9)
+# define XOR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xa)
+# define OR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xb)
+
+# define CMPEQ(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x0)
+# define CMPHS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x2)
+# define CMPGE(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x3)
+# define DIV1(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x4)
+# define DMULU(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x5)
+# define CMPHI(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x6)
+# define CMPGT(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x7)
+# define SUB(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x8)
+# define SUBC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xa)
+# define SUBV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xb)
+# define ADD(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xc)
+# define ADDC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xe)
+# define ADDV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xf)
+# define DMULS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xd)
+
+# define SHLL(rn) _cni(_jit, 0x4, rn, 0x00)
+# define SHLR(rn) _cni(_jit, 0x4, rn, 0x01)
+# define ROTL(rn) _cni(_jit, 0x4, rn, 0x04)
+# define ROTR(rn) _cni(_jit, 0x4, rn, 0x05)
+# define SHLL2(rn) _cni(_jit, 0x4, rn, 0x08)
+# define SHLR2(rn) _cni(_jit, 0x4, rn, 0x09)
+# define JSR(rn) _cni(_jit, 0x4, rn, 0x0b)
+# define DT(rn) _cni(_jit, 0x4, rn, 0x10)
+# define CMPPZ(rn) _cni(_jit, 0x4, rn, 0x11)
+# define CMPPL(rn) _cni(_jit, 0x4, rn, 0x15)
+# define SHLL8(rn) _cni(_jit, 0x4, rn, 0x18)
+# define SHLR8(rn) _cni(_jit, 0x4, rn, 0x19)
+# define TAS(rn) _cni(_jit, 0x4, rn, 0x1b)
+# define LDCGBR(rm) _cni(_jit, 0x4, rm, 0x1e)
+# define SHAL(rn) _cni(_jit, 0x4, rn, 0x20)
+# define SHAR(rn) _cni(_jit, 0x4, rn, 0x21)
+# define ROTCL(rn) _cni(_jit, 0x4, rn, 0x24)
+# define ROTCR(rn) _cni(_jit, 0x4, rn, 0x25)
+# define SHLL16(rn) _cni(_jit, 0x4, rn, 0x28)
+# define SHLR16(rn) _cni(_jit, 0x4, rn, 0x29)
+# define LDSPR(rn) _cni(_jit, 0x4, rn, 0x2a)
+# define JMP(rn) _cni(_jit, 0x4, rn, 0x2b)
+# define LDS(rn) _cni(_jit, 0x4, rn, 0x5a)
+# define LDSFP(rn) _cni(_jit, 0x4, rn, 0x6a)
+# define SHAD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xc)
+# define SHLD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xd)
+
+# define LDDL(rn, rm, imm) _cnmd(_jit, 0x5, rn, rm, imm)
+
+# define LDB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x0)
+# define LDW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x1)
+# define LDL(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x2)
+# define MOV(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x3)
+# define LDBU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x4)
+# define LDWU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x5)
+# define LDLU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x6)
+# define NOT(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x7)
+# define SWAPB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x8)
+# define SWAPW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x9)
+# define NEGC(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xa)
+# define NEG(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xb)
+# define EXTUB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xc)
+# define EXTUW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xd)
+# define EXTSB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xe)
+# define EXTSW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xf)
+
+# define ADDI(rn, imm) _cni(_jit, 0x7, rn, imm)
+
+# define LDDB(rm, imm) _cnmd(_jit, 0x8, 0x4, rm, imm)
+# define LDDW(rm, imm) _cnmd(_jit, 0x8, 0x5, rm, imm)
+# define CMPEQI(imm) _cni(_jit, 0x8, 0x8, imm)
+# define BT(imm) _cni(_jit, 0x8, 0x9, imm)
+# define BF(imm) _cni(_jit, 0x8, 0xb, imm)
+# define BTS(imm) _cni(_jit, 0x8, 0xd, imm)
+# define BFS(imm) _cni(_jit, 0x8, 0xf, imm)
+
+# define LDPW(rn, imm) _cni(_jit, 0x9, rn, imm)
+
+# define BRA(imm) _cd(_jit, 0xa, imm)
+
+# define BSR(imm) _cd(_jit, 0xb, imm)
+
+# define GBRSTB(imm) _cni(_jit, 0xc, 0x0, imm)
+# define GBRSTW(imm) _cni(_jit, 0xc, 0x1, imm)
+# define GBRSTL(imm) _cni(_jit, 0xc, 0x2, imm)
+# define GBRLDB(imm) _cni(_jit, 0xc, 0x4, imm)
+# define GBRLDW(imm) _cni(_jit, 0xc, 0x5, imm)
+# define GBRLDL(imm) _cni(_jit, 0xc, 0x6, imm)
+# define MOVA(imm) _cni(_jit, 0xc, 0x7, imm)
+# define TSTI(imm) _cni(_jit, 0xc, 0x8, imm)
+# define ANDI(imm) _cni(_jit, 0xc, 0x9, imm)
+# define XORI(imm) _cni(_jit, 0xc, 0xa, imm)
+# define ORI(imm) _cni(_jit, 0xc, 0xb, imm)
+
+# define LDPL(rn, imm) _cni(_jit, 0xd, rn, imm)
+
+# define MOVI(rn, imm) _cni(_jit, 0xe, rn, imm)
+
+# define FADD(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x0)
+# define FSUB(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x1)
+# define FMUL(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x2)
+# define FDIV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x3)
+# define FCMPEQ(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x4)
+# define FCMPGT(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x5)
+# define LDXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x6)
+# define STXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x7)
+# define LDF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x8)
+# define LDFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x9)
+# define STF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xa)
+# define STFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xb)
+# define FMOV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xc)
+# define FMAC(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xe)
+# define FSTS(rn) _cni(_jit, 0xf, rn, 0x0d)
+# define FLDS(rn) _cni(_jit, 0xf, rn, 0x1d)
+# define FLOAT(rn) _cni(_jit, 0xf, rn, 0x2d)
+# define FTRC(rn) _cni(_jit, 0xf, rn, 0x3d)
+# define FNEG(rn) _cni(_jit, 0xf, rn, 0x4d)
+# define FABS(rn) _cni(_jit, 0xf, rn, 0x5d)
+# define FSQRT(rn) _cni(_jit, 0xf, rn, 0x6d)
+# define FLDI0(rn) _cni(_jit, 0xf, rn, 0x8d)
+# define FLDI1(rn) _cni(_jit, 0xf, rn, 0x9d)
+# define FCNVSD(rn) _cni(_jit, 0xf, rn, 0xad)
+# define FCNVDS(rn) _cni(_jit, 0xf, rn, 0xbd)
+
+# define FMOVXX(rn, rm) FMOV((rn) | 1, (rm) | 1)
+# define FMOVDX(rn, rm) FMOV((rn) | 0, (rm) | 1)
+# define FMOVXD(rn, rm) FMOV((rn) | 1, (rm) | 0)
+
+# define CLRT() ii(0x8)
+# define NOP() ii(0x9)
+# define RTS() ii(0xb)
+# define SETT() ii(0x18)
+# define DIV0U() ii(0x19)
+# define FSCHG() ii(0xf3fd)
+# define FRCHG() ii(0xfbfd)
+
+# define ii(i) *_jit->pc.us++ = i
+
+# define stack_framesize ((JIT_V_NUM + 2) * 4)
+
+# define PR_FLAG (1 << 19)
+# define SZ_FLAG (1 << 20)
+# define FR_FLAG (1 << 21)
+
+static void _nop(jit_state_t*,jit_word_t);
+# define nop(i0) _nop(_jit,i0)
+static void _movr(jit_state_t*,jit_uint16_t,jit_uint16_t);
+# define movr(r0,r1) _movr(_jit,r0,r1)
+static void _movi(jit_state_t*,jit_uint16_t,jit_word_t);
+# define movi(r0,i0) _movi(_jit,r0,i0)
+static void _movnr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t);
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2,1)
+# define movzr(r0,r1,r2) _movnr(_jit,r0,r1,r2,0)
+# define casx(r0,r1,r2,r3,i0) _casx(_jit,r0,r1,r2,r3,i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+# define casr(r0,r1,r2,r3) casx(r0,r1,r2,r3,0)
+# define casi(r0,i0,r1,r2) casx(r0,_NOREG,r1,r2,i0)
+static void _addr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define addxr(r0,r1,r2) _addxr(_jit,r0,r1,r2)
+static void _addi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0)
+static void _subr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define subr(r0,r1,r2) _subr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define subcr(r0,r1,r2) _subcr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define subxr(r0,r1,r2) _subxr(_jit,r0,r1,r2)
+static void _subi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0)
+static void _rsbi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define rsbi(r0,r1,i0) _rsbi(_jit,r0,r1,i0)
+static void _mulr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2)
+static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
+static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
+static void _qmulr(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_uint16_t);
+# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_uint16_t);
+# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3)
+static void _muli(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
+static void _qmuli(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_word_t);
+# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_word_t);
+# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0)
+static void _divr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define divr(r0,r1,r2) _divr(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define divr_u(r0,r1,r2) _divr_u(_jit,r0,r1,r2)
+static void _qdivr(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_uint16_t);
+# define qdivr(r0,r1,r2,r3) _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_uint16_t);
+# define qdivr_u(r0,r1,r2,r3) _qdivr_u(_jit,r0,r1,r2,r3)
+static void _divi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0)
+# define divi_u(r0,r1,i0) fallback_divi_u(r0,r1,i0)
+static void _qdivi(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_word_t);
+# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+ jit_uint16_t,jit_word_t);
+# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0)
+static void _remr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2)
+static void _remi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0)
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_uint16_t,jit_uint16_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#define extr(r0,r1,i0,i1) fallback_ext(r0,r1,i0,i1)
+#define extr_u(r0,r1,i0,i1) fallback_ext_u(r0,r1,i0,i1)
+#define depr(r0,r1,i0,i1) fallback_dep(r0,r1,i0,i1)
+# define extr_c(r0, r1) EXTSB(r0,r1)
+# define extr_s(r0,r1) EXTSW(r0,r1)
+# define extr_uc(r0,r1) EXTUB(r0,r1)
+# define extr_us(r0,r1) EXTUW(r0,r1)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0)
+# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0)
+static void _andr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2)
+static void _andi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
+static void _orr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define orr(r0,r1,r2) _orr(_jit,r0,r1,r2)
+static void _ori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0)
+static void _xorr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define xorr(r0,r1,r2) _xorr(_jit,r0,r1,r2)
+static void _xori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
+# define comr(r0,r1) NOT(r0,r1)
+# define negr(r0,r1) NEG(r0,r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clor(r0,r1) _clor(_jit,r0,r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0,r1) _clzr(_jit,r0,r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0,r1) _ctor(_jit,r0,r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0,r1) _ctzr(_jit,r0,r1)
+static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define rbitr(r0, r1) _rbitr(_jit, r0, r1)
+static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define popcntr(r0, r1) _popcntr(_jit, r0, r1)
+static void _gtr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define gtr(r0,r1,r2) _gtr(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ger(r0,r1,r2) _ger(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define gtr_u(r0,r1,r2) _gtr_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ger_u(r0,r1,r2) _ger_u(_jit,r0,r1,r2)
+# define ltr(r0,r1,r2) gtr(r0,r2,r1)
+# define ltr_u(r0,r1,r2) gtr_u(r0,r2,r1)
+# define ler(r0,r1,r2) ger(r0,r2,r1)
+# define ler_u(r0,r1,r2) ger_u(r0,r2,r1)
+static void _eqr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define eqr(r0,r1,r2) _eqr(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ner(r0,r1,r2) _ner(_jit,r0,r1,r2)
+static void _eqi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define eqi(r0,r1,i0) _eqi(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define nei(r0,r1,i0) _nei(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define gti(r0,r1,i0) _gti(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define gei(r0,r1,i0) _gei(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define gti_u(r0,r1,i0) _gti_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define gei_u(r0,r1,i0) _gei_u(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define lti(r0,r1,i0) _lti(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define lei(r0,r1,i0) _lei(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define lti_u(r0,r1,i0) _lti_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define lei_u(r0,r1,i0) _lei_u(_jit,r0,r1,i0)
+static void _lshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2)
+static void _lshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
+# define qlshr(r0,r1,r2,r3) _qlshr(_jit,r0,r1,r2,r3)
+static void
+_qlshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define qlshr_u(r0, r1, r2, r3) _qlshr_u(_jit,r0,r1,r2,r3)
+static void
+_qlshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
+# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
+# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
+static void
+_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+# define qrshr(r0, r1, r2, r3) _qrshr(_jit,r0,r1,r2,r3)
+static void
+_qrshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define qrshr_u(r0, r1, r2, r3) _qrshr_u(_jit,r0,r1,r2,r3)
+static void
+_qrshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
+# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
+# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
+static void
+_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+# define ldr_c(r0,r1) LDB(r0,r1)
+# define ldr_s(r0,r1) LDW(r0,r1)
+# define ldr_i(r0,r1) LDL(r0,r1)
+static void _ldr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t);
+# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_uint16_t,jit_uint16_t);
+# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1)
+static void _ldi_c(jit_state_t*,jit_uint16_t,jit_word_t);
+# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_uint16_t,jit_word_t);
+# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_uint16_t,jit_word_t);
+# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_uint16_t,jit_word_t);
+# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_uint16_t,jit_word_t);
+# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0)
+static void _ldxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ldxr_s(r0,r1,r2) _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ldxr_i(r0,r1,r2) _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2)
+static void _ldxi_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0)
+# define ldxbi_c(r0,r1,i0) generic_ldxbi_c(r0,r1,i0)
+# define ldxbi_uc(r0,r1,i0) generic_ldxbi_uc(r0,r1,i0)
+# define ldxbi_s(r0,r1,i0) generic_ldxbi_s(r0,r1,i0)
+# define ldxbi_us(r0,r1,i0) generic_ldxbi_us(r0,r1,i0)
+# define ldxbi_i(r0,r1,i0) generic_ldxbi_i(r0,r1,i0)
+static void _ldxai_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxai_c(r0,r1,i0) _ldxai_c(_jit,r0,r1,i0)
+static void _ldxai_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxai_uc(r0,r1,i0) _ldxai_uc(_jit,r0,r1,i0)
+static void _ldxai_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxai_s(r0,r1,i0) _ldxai_s(_jit,r0,r1,i0)
+static void _ldxai_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxai_us(r0,r1,i0) _ldxai_us(_jit,r0,r1,i0)
+static void _ldxai_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+# define ldxai_i(r0,r1,i0) _ldxai_i(_jit,r0,r1,i0)
+# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0)
+# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1)
+# define unldr_u(r0, r1, i0) fallback_unldr_u(r0, r1, i0)
+# define unldi_u(r0, i0, i1) fallback_unldi_u(r0, i0, i1)
+# define str_c(r0,r1) STB(r0,r1)
+# define str_s(r0,r1) STW(r0,r1)
+# define str_i(r0,r1) STL(r0,r1)
+static void _sti_c(jit_state_t*,jit_word_t,jit_uint16_t);
+# define sti_c(i0,r0) _sti_c(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_uint16_t);
+# define sti_s(i0,r0) _sti_s(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_uint16_t);
+# define sti_i(i0,r0) _sti_i(_jit,i0,r0)
+static void _stxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define stxr_c(r0,r1,r2) _stxr_c(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define stxr_s(r0,r1,r2) _stxr_s(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+# define stxr_i(r0,r1,r2) _stxr_i(_jit,r0,r1,r2)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1)
+static void _stxbi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxbi_c(i0,r0,r1) _stxbi_c(_jit,i0,r0,r1)
+static void _stxbi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxbi_s(i0,r0,r1) _stxbi_s(_jit,i0,r0,r1)
+static void _stxbi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+# define stxbi_i(i0,r0,r1) _stxbi_i(_jit,i0,r0,r1)
+# define stxai_c(i0,r0,r1) generic_stxai_c(i0,r0,r1)
+# define stxai_s(i0,r0,r1) generic_stxai_s(i0,r0,r1)
+# define stxai_i(i0,r0,r1) generic_stxai_i(i0,r0,r1)
+# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0)
+# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define bltr(i0,r0,r1) bltr_p(i0,r0,r1,0)
+# define bler(i0,r0,r1) bler_p(i0,r0,r1,0)
+# define bgtr(i0,r0,r1) bgtr_p(i0,r0,r1,0)
+# define bger(i0,r0,r1) bger_p(i0,r0,r1,0)
+# define bltr_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,0,p)
+# define bler_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,1,p)
+# define bgtr_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,0,p)
+# define bger_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,1,p)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define bltr_u(i0,r0,r1) bltr_u_p(i0,r0,r1,0)
+# define bler_u(i0,r0,r1) bler_u_p(i0,r0,r1,0)
+# define bgtr_u(i0,r0,r1) bgtr_u_p(i0,r0,r1,0)
+# define bger_u(i0,r0,r1) bger_u_p(i0,r0,r1,0)
+# define bltr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,0,p)
+# define bler_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,1,p)
+# define bgtr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,0,p)
+# define bger_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,1,p)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t);
+# define beqr(i0,r0,r1) beqr_p(i0,r0,r1,0)
+# define beqr_p(i0,r0,r1,p) _beqr(_jit,i0,r0,r1,p)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t);
+# define bner(i0,r0,r1) bner_p(i0,r0,r1,0)
+# define bner_p(i0,r0,r1,p) _bner(_jit,i0,r0,r1,p)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t);
+# define bmsr(i0,r0,r1) bmsr_p(i0,r0,r1,0)
+# define bmsr_p(i0,r0,r1,p) _bmsr(_jit,i0,r0,r1,p)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t);
+# define bmcr(i0,r0,r1) bmcr_p(i0,r0,r1,0)
+# define bmcr_p(i0,r0,r1,p) _bmcr(_jit,i0,r0,r1,p)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define boaddr(i0,r0,r1) boaddr_p(i0,r0,r1,0)
+# define bxaddr(i0,r0,r1) bxaddr_p(i0,r0,r1,0)
+# define boaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,1,p)
+# define bxaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,0,p)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define boaddr_u(i0,r0,r1) boaddr_u_p(i0,r0,r1,0)
+# define bxaddr_u(i0,r0,r1) bxaddr_u_p(i0,r0,r1,0)
+# define boaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,1,p)
+# define bxaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,0,p)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define bosubr(i0,r0,r1) bosubr_p(i0,r0,r1,0)
+# define bxsubr(i0,r0,r1) bxsubr_p(i0,r0,r1,0)
+# define bosubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,1,p)
+# define bxsubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,0,p)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_uint16_t,jit_bool_t,jit_bool_t);
+# define bosubr_u(i0,r0,r1) bosubr_u_p(i0,r0,r1,0)
+# define bxsubr_u(i0,r0,r1) bxsubr_u_p(i0,r0,r1,0)
+# define bosubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,1,p)
+# define bxsubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,0,p)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define blei(i0,r0,i1) blei_p(i0,r0,i1,0)
+# define bgti(i0,r0,i1) bgti_p(i0,r0,i1,0)
+# define blei_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,0,p)
+# define bgti_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define blti(i0,r0,i1) blti_p(i0,r0,i1,0)
+# define bgei(i0,r0,i1) bgei_p(i0,r0,i1,0)
+# define blti_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,0,p)
+# define bgei_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define blei_u(i0,r0,i1) blei_u_p(i0,r0,i1,0)
+# define bgti_u(i0,r0,i1) bgti_u_p(i0,r0,i1,0)
+# define blei_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,0,p)
+# define bgti_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define blti_u(i0,r0,i1) blti_u_p(i0,r0,i1,0)
+# define bgei_u(i0,r0,i1) bgei_u_p(i0,r0,i1,0)
+# define blti_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,0,p)
+# define bgei_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,1,p)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define beqi(i0,r0,i1) beqi_p(i0,r0,i1,0)
+# define bnei(i0,r0,i1) bnei_p(i0,r0,i1,0)
+# define beqi_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,1,p)
+# define bnei_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,0,p)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define bmsi(i0,r0,i1) bmsi_p(i0,r0,i1,0)
+# define bmci(i0,r0,i1) bmci_p(i0,r0,i1,0)
+# define bmsi_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,0,p)
+# define bmci_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,1,p)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define boaddi(i0,r0,i1) boaddi_p(i0,r0,i1,0)
+# define bxaddi(i0,r0,i1) bxaddi_p(i0,r0,i1,0)
+# define boaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,1,p)
+# define bxaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,0,p)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define boaddi_u(i0,r0,i1) boaddi_u_p(i0,r0,i1,0)
+# define bxaddi_u(i0,r0,i1) bxaddi_u_p(i0,r0,i1,0)
+# define boaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,1,p)
+# define bxaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,0,p)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define bosubi(i0,r0,i1) bosubi_p(i0,r0,i1,0)
+# define bxsubi(i0,r0,i1) bxsubi_p(i0,r0,i1,0)
+# define bosubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,1,p)
+# define bxsubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,0,p)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_uint16_t,
+ jit_word_t,jit_bool_t,jit_bool_t);
+# define bosubi_u(i0,r0,i1) bosubi_u_p(i0,r0,i1,0)
+# define bxsubi_u(i0,r0,i1) bxsubi_u_p(i0,r0,i1,0)
+# define bosubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,1,p)
+# define bxsubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,0,p)
+static void _jmpr(jit_state_t*,jit_int16_t);
+# define jmpr(r0) _jmpr(_jit,r0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t);
+# define jmpi(i0) _jmpi(_jit,i0,0)
+static void _callr(jit_state_t*,jit_int16_t);
+# define callr(r0) _callr(_jit,r0)
+static void _calli(jit_state_t*,jit_word_t);
+# define calli(i0) _calli(_jit,i0)
+
+static jit_word_t _movi_p(jit_state_t*,jit_uint16_t,jit_word_t);
+# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+# define jmpi_p(i0) _jmpi_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+# define calli_p(i0) _calli_p(_jit,i0)
+static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
+# define patch_abs(instr,label) _patch_abs(_jit,instr,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+# define patch_at(jump,label) _patch_at(_jit,jump,label)
+static void _prolog(jit_state_t*,jit_node_t*);
+# define prolog(node) _prolog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+# define epilog(node) _epilog(_jit,node)
+static void _vastart(jit_state_t*, jit_int32_t);
+# define vastart(r0) _vastart(_jit, r0)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
+
+# define ldr(r0,r1) ldr_i(r0,r1)
+# define ldi(r0,i0) ldi_i(r0,i0)
+# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2)
+# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0)
+# define str(r0,r1) str_i(r0,r1)
+# define sti(i0,r0) sti_i(i0,r0)
+# define stxr(r0,r1,r2) stxr_i(r0,r1,r2)
+# define stxi(i0,r0,r1) stxi_i(i0,r0,r1)
+
+# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
+# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0)
+# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
+# define masked_bits_count(im) __builtin_popcountl(im)
+# define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im))
+
+# if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+# define jit_sh34_p() 1
+# else
+# define jit_sh34_p() 0
+# endif
+
+static void _maybe_emit_frchg(jit_state_t *_jit);
+# define maybe_emit_frchg() _maybe_emit_frchg(_jit)
+static void _maybe_emit_fschg(jit_state_t *_jit);
+# define maybe_emit_fschg() _maybe_emit_fschg(_jit)
+#endif /* PROTO */
+
+#if CODE
+static void
+_cni(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, jit_uint16_t i)
+{
+ jit_instr_t op;
+
+ op.ni = (struct jit_instr_ni){ .c = c, .n = n, .i = i };
+
+ ii(op.op);
+}
+
+static void
+_cnmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n,
+ jit_uint16_t m, jit_uint16_t d)
+{
+ jit_instr_t op;
+
+ op.nmd = (struct jit_instr_nmd){ .c = c, .n = n, .m = m, .d = d };
+
+ ii(op.op);
+}
+
+static void
+_cmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t m, jit_uint16_t d)
+{
+ jit_instr_t op;
+
+ op.md = (struct jit_instr_md){ .c = c, .m = m, .d = d };
+
+ ii(op.op);
+}
+
+static void
+_cd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t d)
+{
+ jit_instr_t op;
+
+ op.d = (struct jit_instr_d){ .c = c, .d = d };
+
+ ii(op.op);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_word_t i0)
+{
+ for (; i0 > 0; i0 -= 2)
+ NOP();
+ assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ if (r0 != r1) {
+ if (r1 == _GBR)
+ STCGBR(r0);
+ else if (r0 == _GBR)
+ LDCGBR(r1);
+ else
+ MOV(r0, r1);
+ }
+}
+
+static void
+movi_loop(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ jit_word_t tmp;
+
+ if (i0 >= -128 && i0 < 128) {
+ MOVI(r0, i0);
+ } else {
+ tmp = (i0 >> 8) + !!(i0 & 0x80);
+ if (tmp & 0xff) {
+ movi_loop(_jit, r0, tmp);
+ if (tmp != 0)
+ SHLL8(r0);
+ } else {
+ tmp = (i0 >> 16) + !!(i0 & 0x80);
+ movi_loop(_jit, r0, tmp);
+ if (tmp != 0)
+ SHLL16(r0);
+ }
+ if (i0 & 0xff)
+ ADDI(r0, i0 & 0xff);
+ }
+}
+
+static jit_word_t
+movi_loop_cnt(jit_word_t i0)
+{
+ jit_word_t tmp, cnt = 0;
+
+ if (i0 >= -128 && i0 < 128) {
+ cnt = 1;
+ } else {
+ tmp = (i0 >> 8) + !!(i0 & 0x80);
+ if (tmp & 0xff) {
+ cnt += !!tmp + movi_loop_cnt(tmp);
+ } else {
+ tmp = (i0 >> 16) + !!(i0 & 0x80);
+ cnt += !!tmp + movi_loop_cnt(tmp);
+ }
+ cnt += !!(i0 & 0xff);
+ }
+
+ return cnt;
+}
+
+static void
+_movi(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ jit_word_t w = _jit->pc.w & ~3;
+
+ if (i0 >= -128 && i0 < 128) {
+ MOVI(r0, i0);
+ } else if (!(i0 & 0x1) && i0 >= -256 && i0 < 256) {
+ MOVI(r0, i0 >> 1);
+ SHLL(r0);
+ } else if (!(i0 & 0x3) && i0 >= -512 && i0 < 512) {
+ MOVI(r0, i0 >> 2);
+ SHLL2(r0);
+ } else if (i0 >= w && i0 <= w + 0x3ff && !((i0 - w) & 0x3)) {
+ MOVA((i0 - w) >> 2);
+ movr(r0, _R0);
+ } else if (is_low_mask(i0)) {
+ MOVI(r0, -1);
+ rshi_u(r0, r0, unmasked_bits_count(i0));
+ } else if (is_high_mask(i0)) {
+ MOVI(r0, -1);
+ lshi(r0, r0, unmasked_bits_count(i0));
+ } else if (movi_loop_cnt(i0) < 4) {
+ movi_loop(_jit, r0, i0);
+ } else {
+ load_const(0, r0, i0);
+ }
+}
+
+static void
+emit_branch_opcode(jit_state_t *_jit, jit_word_t i0, jit_word_t w,
+ int t_set, int force_patchable)
+{
+ jit_int32_t disp = (i0 - w >> 1) - 2;
+ jit_uint16_t reg;
+
+ if (!force_patchable && i0 == 0) {
+ /* Positive displacement - we don't know the target yet. */
+ if (t_set)
+ BT(0);
+ else
+ BF(0);
+
+ /* Leave space after the BF/BT in case we need to add a
+ * BRA opcode. */
+ w = _jit->code.length - (_jit->pc.uc - _jit->code.ptr);
+ if (w > 254) {
+ NOP();
+ NOP();
+ }
+ } else if (!force_patchable && disp >= -128) {
+ if (t_set)
+ BT(disp);
+ else
+ BF(disp);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ if (force_patchable)
+ movi_p(rn(reg), i0);
+ else
+ movi(rn(reg), i0);
+ if (t_set)
+ BF(0);
+ else
+ BT(0);
+ JMP(rn(reg));
+ NOP();
+
+ jit_unget_reg(reg);
+ }
+}
+
+static void _maybe_emit_frchg(jit_state_t *_jit)
+{
+ jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+ if (_jitc->no_flag && instr->op == 0xfbfd)
+ _jit->pc.us--;
+ else
+ FRCHG();
+}
+
+static void _maybe_emit_fschg(jit_state_t *_jit)
+{
+ jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+ if (_jitc->no_flag && instr->op == 0xf3fd)
+ _jit->pc.us--;
+ else
+ FSCHG();
+}
+
+static void maybe_emit_tst(jit_state_t *_jit, jit_uint16_t r0, jit_bool_t *set)
+{
+ jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+ /* If the previous opcode is a MOVT(r0), we can skip the TST opcode,
+ * but we need to invert the branch condition. */
+ if (_jitc->no_flag && instr->op == (0x29 | (r0 << 8)))
+ *set ^= 1;
+ else
+ TST(r0, r0);
+}
+
+static void _movnr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_bool_t set)
+{
+ maybe_emit_tst(_jit, r2, &set);
+
+ emit_branch_opcode(_jit, 4, 0, set, 0);
+ movr(r0, r1);
+}
+
+static char atomic_byte;
+
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t r1_reg, iscasi, addr_reg;
+
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+
+ addr_reg = jit_get_reg(jit_class_gpr);
+ movi(rn(addr_reg), (uintptr_t)&atomic_byte);
+
+ TAS(rn(addr_reg));
+ BF(-3);
+
+ LDL(r0, r1);
+ CMPEQ(r0, r2);
+ MOVT(r0);
+
+ BF(0);
+ STL(r1, r3);
+
+ MOVI(_R0, 0);
+ STB(rn(addr_reg), _R0);
+
+ jit_unget_reg(addr_reg);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ ADD(r0, r1);
+ } else {
+ movr(r0, r1);
+ ADD(r0, r2);
+ }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CLRT();
+ addxr(r0, r1, r2);
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ ADDC(r0, r1);
+ } else {
+ movr(r0, r1);
+ ADDC(r0, r2);
+ }
+}
+
+static void
+_addi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 >= -128 && i0 < 127) {
+ movr(r0, r1);
+ ADDI(r0, i0);
+ } else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r1, r0);
+ } else {
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ addr(r0, r1, _R0);
+ }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ CLRT();
+ addxi(r0, r1, i0);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ addxr(r0, r1, _R0);
+}
+
+static void
+_subr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r1 == r2) {
+ movi(r0, 0);
+ } else if (r0 == r2) {
+ NEG(r0, r2);
+ ADD(r0, r1);
+ } else {
+ movr(r0, r1);
+ SUB(r0, r2);
+ }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CLRT();
+ subxr(r0, r1, r2);
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ jit_uint32_t reg;
+
+ if (r0 != r2) {
+ movr(r0, r1);
+ SUBC(r0, r2);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ movr(rn(reg), r0);
+ movr(r0, r1);
+ SUBC(r0, rn(reg));
+
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_subi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ addi(r0, r1, -i0);
+}
+
+static void
+_subci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ subcr(r0, r1, _R0);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ subxr(r0, r1, _R0);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if ((jit_uword_t)((i0 >> 7) + 1) < 2) {
+ negr(r0, r1);
+ ADDI(r0, i0);
+ } else if (r0 != r1) {
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(r0, i0);
+ subr(r0, r0, r1);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ subr(r0, _R0, r1);
+ }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ MULL(r1, r2);
+ STSL(r0);
+}
+
+static void
+_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ DMULS(r1, r2);
+ STSH(r0);
+}
+
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ movi(_R0, i0);
+ hmulr(r0, r1, _R0);
+}
+
+static void
+_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ DMULU(r1, r2);
+ STSH(r0);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ movi(_R0, i0);
+ hmulr_u(r0, r1, _R0);
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_uint16_t r3)
+{
+ DMULS(r2, r3);
+ STSL(r0);
+ STSH(r1);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_uint16_t r3)
+{
+ DMULU(r2, r3);
+ STSL(r0);
+ STSH(r1);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ mulr(r0, r1, _R0);
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_word_t i0)
+{
+ assert(r2 != _R0);
+
+ movi(_R0, i0);
+ qmulr(r0, r1, r2, _R0);
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_word_t i0)
+{
+ assert(r2 != _R0);
+
+ movi(_R0, i0);
+ qmulr_u(r0, r1, r2, _R0);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ jit_uint32_t reg, reg2;
+ jit_uint16_t divisor;
+
+ assert(r1 != _R0 && r2 != _R0);
+
+ if (r1 == r2) {
+ MOVI(r0, 1);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ if (r0 == r2) {
+ reg2 = jit_get_reg(jit_class_gpr);
+ movr(rn(reg2), r2);
+ divisor = rn(reg2);
+ } else {
+ divisor = r2;
+ }
+
+ movr(r0, r1);
+ MOVI(_R0, 0);
+
+ CMPGT(_R0, r0);
+ SUBC(rn(reg), rn(reg));
+ SUBC(r0, _R0);
+
+ MOVI(_R0, -2);
+ DIV0S(rn(reg), divisor);
+
+ ROTCL(r0);
+ DIV1(rn(reg), divisor);
+ ROTCL(_R0);
+ XORI(1);
+ BTS(-6);
+ TSTI(1);
+
+ ROTCL(r0);
+ MOVI(_R0, 0);
+ ADDC(r0, _R0);
+
+ jit_unget_reg(reg);
+ if (r0 == r2)
+ jit_unget_reg(reg2);
+ }
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ jit_uint32_t reg, reg2;
+ jit_uint16_t divisor;
+
+ assert(r1 != _R0 && r2 != _R0);
+
+ if (r1 == r2) {
+ MOVI(r0, 1);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ if (r0 == r2) {
+ reg2 = jit_get_reg(jit_class_gpr);
+ movr(rn(reg2), r2);
+ divisor = rn(reg2);
+ } else {
+ divisor = r2;
+ }
+
+ movr(r0, r1);
+ MOVI(rn(reg), 0);
+ MOVI(_R0, -2);
+ DIV0U();
+
+ ROTCL(r0);
+ DIV1(rn(reg), divisor);
+ ROTCL(_R0);
+ XORI(1);
+ BTS(-6);
+ TSTI(1);
+
+ ROTCL(r0);
+
+ jit_unget_reg(reg);
+ if (r0 == r2)
+ jit_unget_reg(reg2);
+ }
+}
+
+static void
+_qdivr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_uint16_t r3)
+{
+ jit_uint32_t reg;
+
+ assert(r2 != _R0 && r3 != _R0);
+
+ if (r0 != r2 && r0 != r3) {
+ divr(r0, r2, r3);
+ mulr(_R0, r0, r3);
+ subr(r1, r2, _R0);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ divr(rn(reg), r2, r3);
+ mulr(_R0, rn(reg), r3);
+ subr(r1, r2, _R0);
+ movr(r0, rn(reg));
+
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_qdivr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_uint16_t r3)
+{
+ jit_uint32_t reg;
+
+ assert(r2 != _R0 && r3 != _R0);
+
+ if (r0 != r2 && r0 != r3) {
+ divr_u(r0, r2, r3);
+ mulr(_R0, r0, r3);
+ subr(r1, r2, _R0);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ divr_u(rn(reg), r2, r3);
+ mulr(_R0, rn(reg), r3);
+ subr(r1, r2, _R0);
+ movr(r0, rn(reg));
+
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_divi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ movi(rn(reg), i0);
+ divr(r0, r1, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+static void
+_qdivi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_word_t i0)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ movi(rn(reg), i0);
+ qdivr(r0, r1, r2, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+ jit_uint16_t r2, jit_word_t i0)
+{
+ if (r0 != r2 && r1 != r2) {
+ fallback_divi_u(r0, r2, i0);
+ muli(r1, r0, i0);
+ subr(r1, r2, r1);
+ } else {
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ fallback_divi_u(rn(reg), r2, i0);
+ muli(_R0, rn(reg), i0);
+ subr(r1, r2, _R0);
+
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_remr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ assert(r1 != _R0 && r2 != _R0);
+
+ qdivr(rn(reg), r0, r1, r2);
+
+ jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ assert(r1 != _R0 && r2 != _R0);
+
+ qdivr_u(rn(reg), r0, r1, r2);
+
+ jit_unget_reg(reg);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ movi(rn(reg), i0);
+ remr(r0, r1, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+ qdivi_u(rn(reg), r0, r1, i0);
+
+ jit_unget_reg(reg);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ EXTUW(r0, r1);
+ SWAPB(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ SWAPB(r0, r1);
+ SWAPW(r0, r0);
+ SWAPB(r0, r0);
+}
+
+static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ movr(_R0, r2);
+ movr(r0, r1);
+
+ ROTL(r0);
+ TST(_R0, _R0);
+ BFS(-4);
+ ADDI(_R0, -1);
+
+ ROTR(r0);
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ movr(_R0, r2);
+ movr(r0, r1);
+
+ ROTR(r0);
+ TST(_R0, _R0);
+ BFS(-4);
+ ADDI(_R0, -1);
+
+ ROTL(r0);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ unsigned int i;
+
+ assert(i0 >= 0 && i0 <= __WORDSIZE - 1);
+ assert(r0 != _R0);
+
+ movr(r0, r1);
+
+ if (i0 < 6) {
+ for (i = 0; i < i0; i++)
+ ROTR(r0);
+ } else if (__WORDSIZE - i0 < 6) {
+ for (i = 0; i < __WORDSIZE - i0; i++)
+ ROTL(r0);
+ } else {
+ movi(_R0, i0);
+ rrotr(r0, r0, _R0);
+ }
+}
+
+static void
+_andr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ AND(r0, r1);
+ } else {
+ movr(r0, r1);
+ AND(r0, r2);
+ }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 0xff) {
+ extr_uc(r0, r1);
+ } else if (i0 == 0xffff) {
+ extr_us(r0, r1);
+ } else if (i0 == 0xffff0000) {
+ SWAPW(r0, r1);
+ SHLL16(r0);
+ } else if (r0 != r1) {
+ movi(r0, i0);
+ AND(r0, r1);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ AND(r0, _R0);
+ }
+}
+
+static void
+_orr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ OR(r0, r1);
+ } else {
+ movr(r0, r1);
+ OR(r0, r2);
+ }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (r0 != r1) {
+ movi(r0, i0);
+ OR(r0, r1);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ OR(r0, _R0);
+ }
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ XOR(r0, r1);
+ } else {
+ movr(r0, r1);
+ XOR(r0, r2);
+ }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (r0 == _R0 && !(i0 & ~0xff)) {
+ movr(r0, r1);
+ XORI(i0);
+ } else if (r0 != r1) {
+ movi(r0, i0);
+ XOR(r0, r1);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ XOR(r0, _R0);
+ }
+}
+
+static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr(_R0, r1);
+ movi(r0, -1);
+
+ SHLL(_R0);
+ BTS(-3);
+ ADDI(r0, 1);
+}
+
+static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr(_R0, r1);
+ movi(r0, -1);
+
+ SETT();
+ ROTCL(_R0);
+ BFS(-3);
+ ADDI(r0, 1);
+}
+
+static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr(_R0, r1);
+ movi(r0, -1);
+
+ SHLR(_R0);
+ BTS(-3);
+ ADDI(r0, 1);
+}
+
+static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr(_R0, r1);
+ movi(r0, -1);
+
+ SETT();
+ ROTCR(_R0);
+ BFS(-3);
+ ADDI(r0, 1);
+}
+
+static void
+_rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr(_R0, r1);
+
+ SETT();
+ ROTCR(_R0);
+ ROTCL(r0);
+ CMPEQI(1);
+ emit_branch_opcode(_jit, -6, 0, 0, 0);
+}
+
+static void
+_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ assert(r0 != _R0);
+
+ movr(_R0, r1);
+ movi(r0, 0);
+
+ SHLR(_R0);
+ NEGC(r0, r0);
+ TST(_R0, _R0);
+ BFS(-5);
+ NEG(r0, r0);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CMPGT(r1, r2);
+ MOVT(r0);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CMPHI(r1, r2);
+ MOVT(r0);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CMPGE(r1, r2);
+ MOVT(r0);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CMPHS(r1, r2);
+ MOVT(r0);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ CMPEQ(r1, r2);
+ MOVT(r0);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0 && r2 != _R0);
+
+ MOVI(_R0, -1);
+ CMPEQ(r1, r2);
+ NEGC(r0, _R0);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ TST(r1, r1);
+ } else if (i0 >= -128 && i0 < 128) {
+ assert(r1 != _R0);
+
+ movr(_R0, r1);
+ CMPEQI(i0);
+ } else {
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPEQ(r1, _R0);
+ }
+ MOVT(r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ if (i0 == 0) {
+ TST(r1, r1);
+ } else if (i0 >= -128 && i0 < 128) {
+ movr(_R0, r1);
+ CMPEQI(i0);
+ } else {
+ movi(_R0, i0);
+ CMPEQ(r1, _R0);
+ }
+
+ MOVI(_R0, -1);
+ NEGC(r0, _R0);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ CMPPL(r1);
+ } else {
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPGT(r1, _R0);
+ }
+ MOVT(r0);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ CMPPZ(r1);
+ } else {
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPGE(r1, _R0);
+ }
+ MOVT(r0);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPHI(r1, _R0);
+ MOVT(r0);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPHS(r1, _R0);
+ MOVT(r0);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ if (i0 == 0) {
+ movr(r0, r1);
+ ROTCL(r0);
+ MOVT(r0);
+ } else {
+ movi(_R0, i0);
+ CMPGT(_R0, r1);
+ MOVT(r0);
+ }
+}
+
+static void
+_lei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPGE(_R0, r1);
+ MOVT(r0);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPHI(_R0, r1);
+ MOVT(r0);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ movi(_R0, i0);
+ CMPHS(_R0, r1);
+ MOVT(r0);
+}
+
+static void
+emit_shllr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ if (jit_sh34_p())
+ SHLD(r0, r1);
+ else {
+ movr(_R0, r1);
+
+ TST(_R0, _R0);
+ BTS(2);
+ DT(_R0);
+ BFS(-3);
+ SHLL(r0);
+ }
+}
+
+static void
+_lshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ if (r0 == r2) {
+ assert(r1 != _R0);
+
+ movr(_R0, r2);
+ movr(r0, r1);
+ emit_shllr(_jit, r0, _R0);
+ } else {
+ movr(r0, r1);
+ emit_shllr(_jit, r0, r2);
+ }
+}
+
+static void
+_rshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ if (jit_sh34_p()) {
+ negr(_R0, r2);
+ movr(r0, r1);
+ SHAD(r0, _R0);
+ } else {
+ movr(_R0, r2);
+ movr(r0, r1);
+
+ TST(_R0, _R0);
+ BTS(2);
+ DT(_R0);
+ BFS(-3);
+ SHAR(r0);
+ }
+}
+
+static void
+_rshr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r0 != _R0 && r1 != _R0);
+
+ if (jit_sh34_p()) {
+ negr(_R0, r2);
+ movr(r0, r1);
+ SHLD(r0, _R0);
+ } else {
+ movr(_R0, r2);
+ movr(r0, r1);
+
+ TST(_R0, _R0);
+ BTS(2);
+ DT(_R0);
+ BFS(-3);
+ SHLR(r0);
+ }
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg, mask = 0x00838387;
+
+ movr(r0, r1);
+
+ if (i0 == 0)
+ return;
+
+ if (i0 == 4) {
+ SHLL2(r0);
+ SHLL2(r0);
+ } else if (mask & (1 << (i0 - 1))) {
+ if (i0 & 0x10)
+ SHLL16(r0);
+ if (i0 & 0x8)
+ SHLL8(r0);
+ if (i0 & 0x2)
+ SHLL2(r0);
+ if (i0 & 0x1)
+ SHLL(r0);
+ } else {
+ reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+ movi(rn(reg), i0);
+ lshr(r0, r0, rn(reg));
+
+ if (r0 == _R0)
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg;
+
+ reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+ movr(r0, r1);
+ if (jit_sh34_p()) {
+ movi(rn(reg), -i0);
+ SHAD(r0, rn(reg));
+ } else {
+ assert(i0 > 0);
+ movi(rn(reg), i0);
+ DT(rn(reg));
+ BFS(-3);
+ SHAR(r0);
+ }
+
+ if (r0 == _R0)
+ jit_unget_reg(reg);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ jit_uint32_t reg, mask = 0x00838387;
+
+ movr(r0, r1);
+
+ if (i0 == 0)
+ return;
+
+ if (i0 == 4) {
+ SHLR2(r0);
+ SHLR2(r0);
+ } else if (mask & (1 << (i0 - 1))) {
+ if (i0 & 0x10)
+ SHLR16(r0);
+ if (i0 & 0x8)
+ SHLR8(r0);
+ if (i0 & 0x2)
+ SHLR2(r0);
+ if (i0 & 0x1)
+ SHLR(r0);
+ } else {
+ reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+ if (jit_sh34_p()) {
+ movi(rn(reg), -i0);
+ SHLD(r0, rn(reg));
+ } else {
+ movi(rn(reg), i0);
+ DT(rn(reg));
+ BFS(-3);
+ SHLR(r0);
+ }
+
+ if (r0 == _R0)
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_qlshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3)
+{
+ assert(r0 != r1);
+ movr(_R0, r3);
+ movr(r0, r2);
+ CMPEQI(32);
+ movr(r1, r2);
+ BF(0);
+ XOR(r0, r0);
+ SHAD(r0, _R0);
+ ADDI(_R0, -__WORDSIZE);
+ SHAD(r1, _R0);
+}
+
+static void
+_qlshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3)
+{
+ assert(r0 != r1);
+ movr(_R0, r3);
+ movr(r0, r2);
+ CMPEQI(32);
+ movr(r1, r2);
+ BF(0);
+ XOR(r0, r0);
+ SHLD(r0, _R0);
+ ADDI(_R0, -__WORDSIZE);
+ SHLD(r1, _R0);
+}
+
+static void
+_xlshi(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(r0, r2);
+ if (sign)
+ rshi(r1, r2, __WORDSIZE - 1);
+ else
+ movi(r1, 0);
+ }
+ else if (i0 == __WORDSIZE) {
+ movr(r1, r2);
+ movi(r0, 0);
+ }
+ else {
+ assert((jit_uword_t)i0 <= __WORDSIZE);
+ if (sign)
+ rshi(r1, r2, __WORDSIZE - i0);
+ else
+ rshi_u(r1, r2, __WORDSIZE - i0);
+ lshi(r0, r2, i0);
+ }
+}
+
+static void
+_qrshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3)
+{
+ assert(r0 != r1);
+ NEG(_R0, r3);
+ movr(r1, r2);
+ CMPEQI(0);
+ movr(r0, r2);
+ BF(0);
+ MOV(r1, _R0);
+ SHAD(r0, _R0);
+ ADDI(_R0, __WORDSIZE);
+ SHAD(r1, _R0);
+}
+
+static void
+_qrshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3)
+{
+ assert(r0 != r1);
+ NEG(_R0, r3);
+ movr(r1, r2);
+ CMPEQI(0);
+ movr(r0, r2);
+ BF(0);
+ MOV(r1, _R0);
+ SHLD(r0, _R0);
+ ADDI(_R0, __WORDSIZE);
+ SHLD(r1, _R0);
+}
+
+static void
+_xrshi(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(r0, r2);
+ movi(r1, 0);
+ }
+ else if (i0 == __WORDSIZE) {
+ movr(r1, r2);
+ if (sign)
+ rshi(r0, r2, __WORDSIZE - 1);
+ else
+ movi(r0, 0);
+ }
+ else {
+ assert((jit_uword_t)i0 <= __WORDSIZE);
+ lshi(r1, r2, __WORDSIZE - i0);
+ if (sign)
+ rshi(r0, r2, i0);
+ else
+ rshi_u(r0, r2, i0);
+ }
+}
+
+static void _ldr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ ldr_c(r0, r1);
+ extr_uc(r0, r0);
+}
+
+static void _ldr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+ ldr_s(r0, r1);
+ extr_us(r0, r0);
+}
+
+static void _ldi_c(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ movi(_R0, i0);
+ ldr_c(r0, _R0);
+}
+
+static void _ldi_s(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ movi(_R0, i0);
+ ldr_s(r0, _R0);
+}
+
+static void _ldi_i(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ movi(_R0, i0);
+ ldr_i(r0, _R0);
+}
+
+static void _ldi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ movi(_R0, i0);
+ ldr_uc(r0, _R0);
+}
+
+static void _ldi_us(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ movi(_R0, i0);
+ ldr_us(r0, _R0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0);
+
+ movr(_R0, r2);
+ LDRB(r0, r1);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0);
+
+ movr(_R0, r2);
+ LDRW(r0, r1);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0);
+
+ movr(_R0, r2);
+ LDRL(r0, r1);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ ldxr_c(r0, r1, r2);
+ extr_uc(r0, r0);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ ldxr_s(r0, r1, r2);
+ extr_us(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ if (r1 == _GBR) {
+ if (i0 >= 0 && i0 <= 0xff) {
+ GBRLDB(i0);
+ movr(r0, _R0);
+ } else {
+ movr(r0, r1);
+ ldxi_c(r0, r0, i0);
+ }
+ } else if (i0 >= 0 && i0 <= 0xf) {
+ LDDB(r1, i0);
+ movr(r0, _R0);
+ } else {
+ movi(_R0, i0);
+ ldxr_c(r0, r1, _R0);
+ }
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ if (r1 == _GBR) {
+ if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) {
+ GBRLDW(i0 >> 1);
+ movr(r0, _R0);
+ } else {
+ movr(r0, r1);
+ ldxi_s(r0, r0, i0);
+ }
+ } else if (i0 >= 0 && i0 <= 0x1f && !(i0 & 0x1)) {
+ LDDW(r1, i0 >> 1);
+ movr(r0, _R0);
+ } else {
+ movi(_R0, i0);
+ ldxr_s(r0, r1, _R0);
+ }
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ if (r1 == _GBR) {
+ if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) {
+ GBRLDL(i0 >> 2);
+ movr(r0, _R0);
+ } else {
+ movr(r0, r1);
+ ldxi_i(r0, r0, i0);
+ }
+ } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 0x3)) {
+ LDDL(r0, r1, i0 >> 2);
+ } else {
+ movi(_R0, i0);
+ ldxr_i(r0, r1, _R0);
+ }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ ldxi_c(_R0, r1, i0);
+ extr_uc(r0, _R0);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ assert(r1 != _R0);
+
+ ldxi_s(_R0, r1, i0);
+ extr_us(r0, _R0);
+}
+
+static void
+_ldxai_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 1)
+ LDBU(r0, r1);
+ else
+ generic_ldxai_c(r0, r1, i0);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 1)
+ LDBU(r0, r1);
+ else
+ generic_ldxai_c(r0, r1, i0);
+ extr_uc(r0, r0);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 2)
+ LDWU(r0, r1);
+ else
+ generic_ldxai_s(r0, r1, i0);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 2)
+ LDWU(r0, r1);
+ else
+ generic_ldxai_s(r0, r1, i0);
+ extr_us(r0, r0);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+ if (i0 == 4)
+ LDLU(r0, r1);
+ else
+ generic_ldxai_i(r0, r1, i0);
+}
+
+static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ str_c(_R0, r0);
+}
+
+static void _sti_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ str_s(_R0, r0);
+}
+
+static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+ assert(r0 != _R0);
+
+ movi(_R0, i0);
+ str_i(_R0, r0);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0 && r2 != _R0);
+
+ movr(_R0, r0);
+ STRB(r1, r2);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0 && r2 != _R0);
+
+ movr(_R0, r0);
+ STRW(r1, r2);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+ assert(r1 != _R0 && r2 != _R0);
+
+ movr(_R0, r0);
+ STRL(r1, r2);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ jit_uint32_t reg;
+
+ if (r0 == _GBR) {
+ if (i0 >= 0 && i0 <= 0xff) {
+ movr(_R0, r1);
+ GBRSTB(i0);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r0);
+ stxi_c(i0, rn(reg), r1);
+ jit_unget_reg(reg);
+ }
+ } else {
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ stxr_c(_R0, r0, r1);
+ }
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ jit_uint32_t reg;
+
+ if (r0 == _GBR) {
+ if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) {
+ movr(_R0, r1);
+ GBRSTW(i0 >> 1);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r0);
+ stxi_s(i0, rn(reg), r1);
+ jit_unget_reg(reg);
+ }
+ } else {
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ stxr_s(_R0, r0, r1);
+ }
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ jit_uint32_t reg;
+
+ if (r0 == _GBR) {
+ if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) {
+ movr(_R0, r1);
+ GBRSTL(i0 >> 2);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r0);
+ stxi_i(i0, rn(reg), r1);
+ jit_unget_reg(reg);
+ }
+ } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 3)) {
+ STDL(r0, r1, i0 >> 2);
+ } else {
+ assert(r0 != _R0 && r1 != _R0);
+
+ movi(_R0, i0);
+ stxr_i(_R0, r0, r1);
+ }
+}
+
+static void
+_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ if (i0 == -1)
+ STBU(r0, r1);
+ else
+ generic_stxbi_c(i0, r0, r1);
+}
+
+static void
+_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ if (i0 == -2)
+ STWU(r0, r1);
+ else
+ generic_stxbi_s(i0, r0, r1);
+}
+
+static void
+_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+ if (i0 == -4)
+ STLU(r0, r1);
+ else
+ generic_stxbi_i(i0, r0, r1);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t t, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ CMPGE(r0, r1);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, t, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t t, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ CMPHS(r0, r1);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, t, p);
+
+ return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (r0 == r1) {
+ if (p)
+ w = jmpi_p(i0);
+ else
+ w = _jmpi(_jit, i0, i0 == 0);
+ } else {
+ CMPEQ(r0, r1);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, 1, p);
+ }
+
+ return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ CMPEQ(r0, r1);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, 0, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t p)
+{
+ jit_bool_t set = 0;
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (r0 != r1)
+ TST(r0, r1);
+ else
+ maybe_emit_tst(_jit, r0, &set);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t p)
+{
+ jit_bool_t set = 1;
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (r0 != r1)
+ TST(r0, r1);
+ else
+ maybe_emit_tst(_jit, r0, &set);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (i1 == 0) {
+ CMPPL(r0);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ CMPGT(r0, _R0);
+ }
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (i1 == 0) {
+ CMPPZ(r0);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ CMPGE(r0, _R0);
+ }
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (i1 == 0) {
+ maybe_emit_tst(_jit, r0, &set);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ CMPHI(r0, _R0);
+ }
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ assert(r0 != _R0);
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ movi(_R0, i1);
+ CMPHS(r0, _R0);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ if (i1 == 0) {
+ maybe_emit_tst(_jit, r0, &set);
+ } else if (i1 >= -128 && i1 < 128) {
+ movr(_R0, r0);
+ CMPEQI(i1);
+ } else {
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ CMPEQ(_R0, r0);
+ }
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _bmsi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ assert(r0 != _R0);
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ movi(_R0, i1);
+ TST(_R0, r0);
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ ADDV(r0, r1);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ CLRT();
+ ADDC(r0, r1);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _boaddi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ w = _boaddr(_jit, i0, r0, _R0, set, p);
+
+ return (w);
+}
+
+static jit_word_t _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ w = _boaddr_u(_jit, i0, r0, _R0, set, p);
+
+ return (w);
+}
+
+static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ assert(r0 != _R0);
+
+ NEG(_R0, r1);
+ ADDV(r0, _R0);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ CLRT();
+ SUBC(r0, r1);
+
+ w = _jit->pc.w;
+ emit_branch_opcode(_jit, i0, w, set, p);
+
+ return (w);
+}
+
+static jit_word_t _bosubi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ w = _bosubr(_jit, i0, r0, _R0, set, p);
+
+ return (w);
+}
+
+static jit_word_t _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+ jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+ jit_word_t w;
+
+ assert(r0 != _R0);
+
+ movi(_R0, i1);
+ w = _bosubr_u(_jit, i0, r0, _R0, set, p);
+
+ return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int16_t r0)
+{
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+ JMP(r0);
+ NOP();
+}
+
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t force)
+{
+ jit_uint16_t reg;
+ jit_int32_t disp;
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ w = _jit->pc.w;
+ disp = (i0 - w >> 1) - 2;
+
+ if (force || (disp >= -2048 && disp <= 2046)) {
+ BRA(disp);
+ NOP();
+ } else if (0) {
+ /* TODO: BRAF */
+ reg = jit_get_reg(jit_class_gpr);
+
+ movi_p(rn(reg), disp - 7);
+ BRAF(rn(reg));
+ NOP();
+
+ jit_unget_reg(reg);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+
+ movi(rn(reg), i0);
+ jmpr(rn(reg));
+
+ jit_unget_reg(reg);
+ }
+
+ return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int16_t r0)
+{
+ reset_fpu(_jit, r0 == _R0);
+
+ JSR(r0);
+ NOP();
+
+ reset_fpu(_jit, 1);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_int32_t disp;
+ jit_uint16_t reg;
+ jit_word_t w;
+
+ reset_fpu(_jit, 0);
+
+ w = _jit->pc.w;
+ disp = (i0 - w >> 1) - 2;
+
+ if (disp >= -2048 && disp <= 2046) {
+ BSR(disp);
+ } else {
+ movi(_R0, i0);
+ JSR(_R0);
+ }
+
+ NOP();
+ reset_fpu(_jit, 1);
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+ jit_word_t w = _jit->pc.w;
+
+ load_const(1, r0, 0);
+
+ return (w);
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_uint16_t reg;
+ jit_word_t w;
+
+ set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+ reg = jit_get_reg(jit_class_gpr);
+ w = movi_p(rn(reg), i0);
+ jmpr(rn(reg));
+ jit_unget_reg(reg);
+
+ return (w);
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_uint16_t reg;
+ jit_word_t w;
+
+ reset_fpu(_jit, 0);
+
+ reg = jit_get_reg(jit_class_gpr);
+ w = movi_p(rn(reg), i0);
+ JSR(rn(reg));
+ NOP();
+ jit_unget_reg(reg);
+
+ reset_fpu(_jit, 1);
+
+ return (w);
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t reg;
+
+ assert(_jitc->function->self.call & jit_call_varargs);
+
+ /* Return jit_va_list_t in the register argument */
+ addi(r0, JIT_FP, _jitc->function->vaoff);
+ reg = jit_get_reg(jit_class_gpr);
+
+ /* Align pointer to 8 bytes with +4 bytes offset (so that the
+ * double values are aligned to 8 bytes */
+ andi(r0, r0, -8);
+ addi(r0, r0, 4);
+
+ /* Initialize the gpr begin/end pointers */
+ addi(rn(reg), r0, sizeof(jit_va_list_t)
+ + _jitc->function->vagp * sizeof(jit_uint32_t));
+ stxi(offsetof(jit_va_list_t, bgpr), r0, rn(reg));
+
+ addi(rn(reg), rn(reg), NUM_WORD_ARGS * sizeof(jit_word_t)
+ - _jitc->function->vagp * sizeof(jit_uint32_t));
+ stxi(offsetof(jit_va_list_t, egpr), r0, rn(reg));
+
+ /* Initialize the fpr begin/end pointers */
+ if (_jitc->function->vafp)
+ addi(rn(reg), rn(reg), _jitc->function->vafp * sizeof(jit_float32_t));
+
+ stxi(offsetof(jit_va_list_t, bfpr), r0, rn(reg));
+ addi(rn(reg), rn(reg), NUM_FLOAT_ARGS * sizeof(jit_float32_t)
+ - _jitc->function->vafp * sizeof(jit_float32_t));
+ stxi(offsetof(jit_va_list_t, efpr), r0, rn(reg));
+
+ /* Initialize the stack pointer to the first stack argument */
+ addi(rn(reg), JIT_FP, _jitc->function->self.size);
+ stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t rg0, rg1;
+ jit_word_t ge_code;
+
+ assert(_jitc->function->self.call & jit_call_varargs);
+
+ rg0 = jit_get_reg(jit_class_gpr);
+ rg1 = jit_get_reg(jit_class_gpr);
+
+ /* Load begin/end gpr pointers */
+ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, egpr));
+ movi(_R0, offsetof(jit_va_list_t, bgpr));
+ ldxr(rn(rg0), r1, _R0);
+
+ /* Check that we didn't reach the end gpr pointer. */
+ CMPHS(rn(rg0), rn(rg1));
+
+ ge_code = _jit->pc.w;
+ BF(0);
+
+ /* If we did, load the stack pointer instead. */
+ movi(_R0, offsetof(jit_va_list_t, over));
+ ldxr(rn(rg0), r1, _R0);
+
+ patch_at(ge_code, _jit->pc.w);
+
+ /* All good, we can now load the actual value */
+ ldxai_i(r0, rn(rg0), sizeof(jit_uint32_t));
+
+ /* Update the pointer (gpr or stack) to the next word */
+ stxr(_R0, r1, rn(rg0));
+
+ jit_unget_reg(rg0);
+ jit_unget_reg(rg1);
+}
+
+static void
+_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+ jit_instr_t *ptr = (jit_instr_t *)instr;
+
+ ptr[0].ni.i = (label >> 24) & 0xff;
+ ptr[2].ni.i = (label >> 16) & 0xff;
+ ptr[4].ni.i = (label >> 8) & 0xff;
+ ptr[6].ni.i = (label >> 0) & 0xff;
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+ jit_instr_t *ptr = (jit_instr_t *)instr;
+ jit_int32_t disp;
+
+ switch (ptr->nmd.c) {
+ case 0xe:
+ patch_abs(instr, label);
+ break;
+ case 0xc:
+ disp = ((label - (instr & ~0x3)) >> 2) - 1;
+ assert(disp >= 0 && disp <= 255);
+ ptr->ni.i = disp;
+ break;
+ case 0xa:
+ disp = ((label - instr) >> 1) - 2;
+ assert(disp >= -2048 && disp <= 2046);
+ ptr->d.d = disp;
+ break;
+ case 0x8:
+ switch (ptr->ni.n) {
+ case 0x9:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ disp = ((label - instr) >> 1) - 2;
+ if (disp >= -128 && disp <= 127) {
+ ptr->ni.i = disp;
+ } else {
+ /* Invert bit 1: BT(S) <-> BF(S) */
+ ptr->ni.n ^= 1 << 1;
+
+ /* Opcode 2 is now a BRA opcode */
+ ptr[1].d = (struct jit_instr_d){ .c = 0xa, .d = disp - 1 };
+ }
+ break;
+ default:
+ assert(!"unhandled branch opcode");
+ }
+ break;
+ case 0xd:
+ if (ptr->op & 0xff) {
+ /* TODO: Fix the mess. patch_at() gets called with 'instr' pointing
+ * to the mov.l opcode and 'label' being the value that should be
+ * loaded into the register. So we read the address at which the mov.l
+ * points to, and write the label there. */
+ *(jit_uint32_t *)((instr & ~0x3) + 4 + (ptr->op & 0xff) * 4) = label;
+ } else {
+ disp = ((label - instr) >> 2) - 1 + !!(instr & 0x3);
+ ptr->op = (ptr->op & 0xff00) | disp;
+ }
+ break;
+ default:
+ assert("unhandled branch opcode");
+ }
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+ jit_uint16_t reg, regno, offs;
+
+ if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+ jit_int32_t frame = -_jitc->function->frame;
+ assert(_jitc->function->self.aoff >= frame);
+ if (_jitc->function->assume_frame)
+ return;
+ _jitc->function->self.aoff = frame;
+ }
+
+ if (_jitc->function->allocar)
+ _jitc->function->self.aoff &= -8;
+ _jitc->function->stack = ((_jitc->function->self.alen -
+ /* align stack at 8 bytes */
+ _jitc->function->self.aoff) + 7) & -8;
+
+ ADDI(JIT_SP, -stack_framesize);
+ STDL(JIT_SP, JIT_FP, JIT_V_NUM + 1);
+
+ STSPR(_R0);
+ STDL(JIT_SP, _R0, JIT_V_NUM);
+
+ for (regno = 0; regno < JIT_V_NUM; regno++)
+ if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(regno)))
+ STDL(JIT_SP, JIT_V(regno), regno);
+
+ movr(JIT_FP, JIT_SP);
+
+ if (_jitc->function->stack)
+ subi(JIT_SP, JIT_SP, _jitc->function->stack);
+ if (_jitc->function->allocar) {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), _jitc->function->self.aoff);
+ stxi_i(_jitc->function->aoffoff, JIT_FP, rn(reg));
+ jit_unget_reg(reg);
+ }
+
+ if (_jitc->function->self.call & jit_call_varargs) {
+ /* Align to 8 bytes with +4 bytes offset (so that the double
+ * values are aligned to 8 bytes */
+ andi(JIT_R0, JIT_FP, -8);
+ addi(JIT_R0, JIT_R0, 4);
+
+ for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); regno++) {
+ stxi(_jitc->function->vaoff
+ + sizeof(jit_va_list_t)
+ + regno * sizeof(jit_word_t),
+ JIT_R0, rn(_R4 + regno));
+ }
+
+ for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); regno++) {
+ stxi_f(_jitc->function->vaoff
+ + sizeof(jit_va_list_t)
+ + NUM_WORD_ARGS * sizeof(jit_word_t)
+ + regno * sizeof(jit_float32_t),
+ JIT_R0, rn(_F4 + (regno ^ fpr_args_inverted())));
+ }
+ }
+
+ reset_fpu(_jit, 0);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+ unsigned int i;
+
+ if (_jitc->function->assume_frame)
+ return;
+
+ reset_fpu(_jit, 1);
+
+ movr(JIT_SP, JIT_FP);
+
+ for (i = JIT_V_NUM; i > 0; i--)
+ if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(i - 1)))
+ LDDL(JIT_V(i - 1), JIT_SP, i - 1);
+
+ LDDL(JIT_FP, JIT_SP, JIT_V_NUM);
+ LDSPR(JIT_FP);
+
+ LDDL(JIT_FP, JIT_SP, JIT_V_NUM + 1);
+ RTS();
+ ADDI(JIT_SP, stack_framesize);
+}
+#endif /* CODE */
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月14日 15:26:55 +0000

AltStyle によって変換されたページ (->オリジナル) /