-rw-r--r-- | check/float.tst | 4 | ||||
-rw-r--r-- | check/lightning.c | 5 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | include/lightning.h.in | 2 | ||||
-rw-r--r-- | include/lightning/Makefile.am | 4 | ||||
-rw-r--r-- | include/lightning/jit_private.h | 30 | ||||
-rw-r--r-- | include/lightning/jit_sh.h | 83 | ||||
-rw-r--r-- | lib/Makefile.am | 3 | ||||
-rw-r--r-- | lib/jit_disasm.c | 6 | ||||
-rw-r--r-- | lib/jit_sh-cpu.c | 3209 | ||||
-rw-r--r-- | lib/jit_sh-fpu.c | 2394 | ||||
-rw-r--r-- | lib/jit_sh-sz.c | 598 | ||||
-rw-r--r-- | lib/jit_sh.c | 2215 | ||||
-rw-r--r-- | lib/jit_size.c | 2 | ||||
-rw-r--r-- | lib/lightning.c | 2 |
diff --git a/check/float.tst b/check/float.tst index 3465780..a181f84 100644 --- a/check/float.tst +++ b/check/float.tst @@ -14,14 +14,14 @@ ok: # define x80 0x8000000000000000 #endif -#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv +#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv || __sh__ # define wnan x7f #elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__ # define wnan 0 #else # define wnan x80 #endif -#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__ +#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__ || __sh__ # define wpinf x7f #elif __alpha__ /* (at least) bug compatible with gcc 4.2.3 -ieee */ diff --git a/check/lightning.c b/check/lightning.c index 443a5b4..d485b5c 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -4860,6 +4860,11 @@ main(int argc, char *argv[]) sizeof(cmdline) - opt_short, " -D__loongarch__=1"); #endif +#if defined(__sh__) + opt_short += snprintf(cmdline + opt_short, + sizeof(cmdline) - opt_short, + " -D__sh__=1"); +#endif if ((parser.fp = popen(cmdline, "r")) == NULL) error("cannot execute %s", cmdline); diff --git a/configure.ac b/configure.ac index b35bcf7..31594ad 100644 --- a/configure.ac +++ b/configure.ac @@ -222,6 +222,7 @@ case "$target_cpu" in alpha*) cpu=alpha ;; riscv*) cpu=riscv ;; loongarch*) cpu=loongarch ;; + sh*) cpu=sh ;; *) ;; esac AM_CONDITIONAL(cpu_arm, [test cpu-$cpu = cpu-arm]) @@ -236,6 +237,7 @@ AM_CONDITIONAL(cpu_s390, [test cpu-$cpu = cpu-s390]) AM_CONDITIONAL(cpu_alpha, [test cpu-$cpu = cpu-alpha]) AM_CONDITIONAL(cpu_riscv, [test cpu-$cpu = cpu-riscv]) AM_CONDITIONAL(cpu_loongarch, [test cpu-$cpu = cpu-loongarch]) +AM_CONDITIONAL(cpu_sh, [test cpu-$cpu = cpu-sh]) # Test x87 if both, x87 and sse2 available ac_cv_test_x86_x87= diff --git a/include/lightning.h.in b/include/lightning.h.in index ad0e633..8fb270a 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -154,6 +154,8 @@ typedef jit_int32_t jit_fpr_t; # include <lightning/jit_riscv.h> #elif defined(__loongarch__) # include <lightning/jit_loongarch.h> +#elif defined(__sh__) +# include <lightning/jit_sh.h> #endif #define jit_flag_node 0x0001 /* patch node not absolute */ diff --git a/include/lightning/Makefile.am b/include/lightning/Makefile.am index e21bbaa..6d3944a 100644 --- a/include/lightning/Makefile.am +++ b/include/lightning/Makefile.am @@ -67,3 +67,7 @@ if cpu_loongarch lightning_include_HEADERS = \ jit_loongarch.h endif +if cpu_sh +lightning_include_HEADERS = \ + jit_sh.h +endif diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 70c2d7d..d350bab 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -177,6 +177,13 @@ typedef jit_uint64_t jit_regset_t; # define JIT_RET _A0 # define JIT_FRET _FA0 typedef jit_uint64_t jit_regset_t; +#elif defined(__sh__) +# define JIT_RA0 _R4 +# define JIT_FA0 _XF4 +# define JIT_SP _R15 +# define JIT_RET _R0 +# define JIT_FRET _XF0 +typedef jit_uint32_t jit_regset_t; #endif #define jit_data(u,v,w) _jit_data(_jit,u,v,w) @@ -447,7 +454,7 @@ typedef struct jit_value jit_value_t; typedef struct jit_compiler jit_compiler_t; typedef struct jit_function jit_function_t; typedef struct jit_register jit_register_t; -#if __arm__ +#if __arm__ || __sh__ # if DISASSEMBLER typedef struct jit_data_info jit_data_info_t; # endif @@ -522,7 +529,7 @@ typedef struct { jit_node_t *node; } jit_patch_t; -#if __arm__ && DISASSEMBLER +#if (__arm__ || __sh__) && DISASSEMBLER struct jit_data_info { jit_uword_t code; /* pointer in code buffer */ jit_word_t length; /* length of constant vector */ @@ -748,6 +755,25 @@ struct jit_compiler { jit_word_t length; /* length of instrs/values vector */ } vector; } consts; +#elif defined(__sh__) +# if DISASSEMBLER + struct { + jit_data_info_t *ptr; + jit_word_t offset; + jit_word_t length; + } data_info; /* constant pools information */ +# endif + jit_bool_t mode_d; + jit_bool_t no_flag; + jit_bool_t uses_fpu; + struct { + jit_uint8_t *data; /* pointer to code */ + jit_word_t size; /* size data */ + jit_word_t offset; /* pending patches */ + jit_word_t length; /* number of pending constants */ + jit_int32_t values[1024]; /* pending constants */ + jit_word_t patches[2048]; + } consts; #endif #if GET_JIT_SIZE /* Temporary storage to calculate instructions length */ diff --git a/include/lightning/jit_sh.h b/include/lightning/jit_sh.h new file mode 100644 index 0000000..25ba582 --- /dev/null +++ b/include/lightning/jit_sh.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#ifndef _jit_sh_h +#define _jit_sh_h + +#define JIT_HASH_CONSTS 0 +#define JIT_NUM_OPERANDS 2 + +typedef enum { +#define jit_r(i) (JIT_R0 + (i)) +#define jit_r_num() 3 +#define jit_v(i) (JIT_V0 + (i)) +#define jit_v_num() 6 +#define jit_f(i) (JIT_F0 - (i) * 2) +#ifdef __SH_FPU_ANY__ +# define jit_f_num() 8 +#else +# define jit_f_num() 0 +#endif + _R0, + + /* caller-saved temporary registers */ +#define JIT_R0 _R1 +#define JIT_R1 _R2 +#define JIT_R2 _R3 + _R1, _R2, _R3, + + /* argument registers */ + _R4, _R5, _R6, _R7, + + /* callee-saved registers */ +#define JIT_V0 _R8 +#define JIT_V1 _R9 +#define JIT_V2 _R10 +#define JIT_V3 _R11 +#define JIT_V4 _R12 +#define JIT_V5 _R13 + _R8, _R9, _R10, _R11, _R12, _R13, + +#define JIT_FP _R14 + _R14, + _R15, + + _GBR, + + /* floating-point registers */ +#define JIT_F0 _F14 +#define JIT_F1 _F12 +#define JIT_F2 _F10 +#define JIT_F3 _F8 +#define JIT_F4 _F6 +#define JIT_F5 _F4 +#define JIT_F6 _F2 +#define JIT_F7 _F0 + _F0, _F1, _F2, _F3, _F4, _F5, _F6, _F7, + _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15, + + /* Banked floating-point registers */ + _XF0, _XF1, _XF2, _XF3, _XF4, _XF5, _XF6, _XF7, + _XF8, _XF9, _XF10, _XF11, _XF12, _XF13, _XF14, _XF15, + +#define JIT_NOREG _NOREG + _NOREG, +} jit_reg_t; + +#endif /* _jit_sh_h */ diff --git a/lib/Makefile.am b/lib/Makefile.am index d10bc9a..2343709 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -83,6 +83,9 @@ EXTRA_DIST = \ jit_s390-cpu.c \ jit_s390-fpu.c \ jit_s390-sz.c \ + jit_sh.c \ + jit_sh-cpu.c \ + jit_sh-sz.c \ jit_sparc.c \ jit_sparc-cpu.c \ jit_sparc-fpu.c \ diff --git a/lib/jit_disasm.c b/lib/jit_disasm.c index 90d90b0..7866f2e 100644 --- a/lib/jit_disasm.c +++ b/lib/jit_disasm.c @@ -112,6 +112,12 @@ jit_init_debug(const char *progname, FILE *stream) # if defined(__s390__) || defined(__s390x__) disasm_info.disassembler_options = "zarch"; # endif +# if defined(__sh__) + disasm_info.arch = bfd_arch_sh; + disasm_info.mach = bfd_mach_sh4; + disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_LITTLE; +# endif + disasm_info.print_address_func = disasm_print_address; # if BINUTILS_2_29 diff --git a/lib/jit_sh-cpu.c b/lib/jit_sh-cpu.c new file mode 100644 index 0000000..cfb8b26 --- /dev/null +++ b/lib/jit_sh-cpu.c @@ -0,0 +1,3209 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#if PROTO + +# ifdef __SH4_SINGLE__ +# define SH_DEFAULT_FPU_MODE 0 +# else +# define SH_DEFAULT_FPU_MODE 1 +# endif + +# ifndef SH_HAS_FPU +# ifdef __SH_FPU_ANY__ +# define SH_HAS_FPU 1 +# else +# define SH_HAS_FPU 0 +# endif +# endif + +# ifdef __SH4_SINGLE_ONLY__ +# define SH_SINGLE_ONLY 1 +# else +# define SH_SINGLE_ONLY 0 +# endif + + +struct jit_instr_ni { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t i :8; + jit_uint16_t n :4; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t n :4; + jit_uint16_t i :8; +#endif +}; + +struct jit_instr_nmd { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :4; + jit_uint16_t m :4; + jit_uint16_t n :4; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t n :4; + jit_uint16_t m :4; + jit_uint16_t d :4; +#endif +}; + +struct jit_instr_md { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :4; + jit_uint16_t m :4; + jit_uint16_t c :8; +#else + jit_uint16_t c :8; + jit_uint16_t m :4; + jit_uint16_t d :4; +#endif +}; + +struct jit_instr_d { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :12; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t d :12; +#endif +}; + +typedef union { + struct jit_instr_ni ni; + struct jit_instr_nmd nmd; + struct jit_instr_md md; + struct jit_instr_d d; + jit_uint16_t op; +} jit_instr_t; + +static void _cni(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void +_cnmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void _cmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void _cd(jit_state_t*,jit_uint16_t,jit_uint16_t); + +# define STRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x4) +# define STRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x5) +# define STRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x6) +# define MULL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x7) +# define LDRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xc) +# define LDRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xd) +# define LDRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xe) +# define BSRF(rn) _cni(_jit, 0x0, rn, 0x03) +# define STCGBR(rn) _cni(_jit, 0x0, rn, 0x12) +# define STSH(rn) _cni(_jit, 0x0, rn, 0x0a) +# define STSL(rn) _cni(_jit, 0x0, rn, 0x1a) +# define BRAF(rn) _cni(_jit, 0x0, rn, 0x23) +# define MOVT(rn) _cni(_jit, 0x0, rn, 0x29) + +# define STSPR(rn) _cni(_jit, 0x0, rn, 0x2a) +# define STSUL(rn) _cni(_jit, 0x0, rn, 0x5a) +# define STSFP(rn) _cni(_jit, 0x0, rn, 0x6a) + +# define STDL(rn, rm, imm) _cnmd(_jit, 0x1, rn, rm, imm) + +# define STB(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x0) +# define STW(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x1) +# define STL(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x2) +# define STBU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x4) +# define STWU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x5) +# define STLU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x6) +# define DIV0S(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x7) +# define TST(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x8) +# define AND(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x9) +# define XOR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xa) +# define OR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xb) + +# define CMPEQ(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x0) +# define CMPHS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x2) +# define CMPGE(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x3) +# define DIV1(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x4) +# define DMULU(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x5) +# define CMPHI(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x6) +# define CMPGT(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x7) +# define SUB(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x8) +# define SUBC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xa) +# define SUBV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xb) +# define ADD(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xc) +# define ADDC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xe) +# define ADDV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xf) +# define DMULS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xd) + +# define SHLL(rn) _cni(_jit, 0x4, rn, 0x00) +# define SHLR(rn) _cni(_jit, 0x4, rn, 0x01) +# define ROTL(rn) _cni(_jit, 0x4, rn, 0x04) +# define ROTR(rn) _cni(_jit, 0x4, rn, 0x05) +# define SHLL2(rn) _cni(_jit, 0x4, rn, 0x08) +# define SHLR2(rn) _cni(_jit, 0x4, rn, 0x09) +# define JSR(rn) _cni(_jit, 0x4, rn, 0x0b) +# define DT(rn) _cni(_jit, 0x4, rn, 0x10) +# define CMPPZ(rn) _cni(_jit, 0x4, rn, 0x11) +# define CMPPL(rn) _cni(_jit, 0x4, rn, 0x15) +# define SHLL8(rn) _cni(_jit, 0x4, rn, 0x18) +# define SHLR8(rn) _cni(_jit, 0x4, rn, 0x19) +# define TAS(rn) _cni(_jit, 0x4, rn, 0x1b) +# define LDCGBR(rm) _cni(_jit, 0x4, rm, 0x1e) +# define SHAL(rn) _cni(_jit, 0x4, rn, 0x20) +# define SHAR(rn) _cni(_jit, 0x4, rn, 0x21) +# define ROTCL(rn) _cni(_jit, 0x4, rn, 0x24) +# define ROTCR(rn) _cni(_jit, 0x4, rn, 0x25) +# define SHLL16(rn) _cni(_jit, 0x4, rn, 0x28) +# define SHLR16(rn) _cni(_jit, 0x4, rn, 0x29) +# define LDSPR(rn) _cni(_jit, 0x4, rn, 0x2a) +# define JMP(rn) _cni(_jit, 0x4, rn, 0x2b) +# define LDS(rn) _cni(_jit, 0x4, rn, 0x5a) +# define LDSFP(rn) _cni(_jit, 0x4, rn, 0x6a) +# define SHAD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xc) +# define SHLD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xd) + +# define LDDL(rn, rm, imm) _cnmd(_jit, 0x5, rn, rm, imm) + +# define LDB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x0) +# define LDW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x1) +# define LDL(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x2) +# define MOV(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x3) +# define LDBU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x4) +# define LDWU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x5) +# define LDLU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x6) +# define NOT(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x7) +# define SWAPB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x8) +# define SWAPW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x9) +# define NEGC(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xa) +# define NEG(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xb) +# define EXTUB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xc) +# define EXTUW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xd) +# define EXTSB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xe) +# define EXTSW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xf) + +# define ADDI(rn, imm) _cni(_jit, 0x7, rn, imm) + +# define LDDB(rm, imm) _cnmd(_jit, 0x8, 0x4, rm, imm) +# define LDDW(rm, imm) _cnmd(_jit, 0x8, 0x5, rm, imm) +# define CMPEQI(imm) _cni(_jit, 0x8, 0x8, imm) +# define BT(imm) _cni(_jit, 0x8, 0x9, imm) +# define BF(imm) _cni(_jit, 0x8, 0xb, imm) +# define BTS(imm) _cni(_jit, 0x8, 0xd, imm) +# define BFS(imm) _cni(_jit, 0x8, 0xf, imm) + +# define LDPW(rn, imm) _cni(_jit, 0x9, rn, imm) + +# define BRA(imm) _cd(_jit, 0xa, imm) + +# define BSR(imm) _cd(_jit, 0xb, imm) + +# define GBRSTB(imm) _cni(_jit, 0xc, 0x0, imm) +# define GBRSTW(imm) _cni(_jit, 0xc, 0x1, imm) +# define GBRSTL(imm) _cni(_jit, 0xc, 0x2, imm) +# define GBRLDB(imm) _cni(_jit, 0xc, 0x4, imm) +# define GBRLDW(imm) _cni(_jit, 0xc, 0x5, imm) +# define GBRLDL(imm) _cni(_jit, 0xc, 0x6, imm) +# define MOVA(imm) _cni(_jit, 0xc, 0x7, imm) +# define TSTI(imm) _cni(_jit, 0xc, 0x8, imm) +# define ANDI(imm) _cni(_jit, 0xc, 0x9, imm) +# define XORI(imm) _cni(_jit, 0xc, 0xa, imm) +# define ORI(imm) _cni(_jit, 0xc, 0xb, imm) + +# define LDPL(rn, imm) _cni(_jit, 0xd, rn, imm) + +# define MOVI(rn, imm) _cni(_jit, 0xe, rn, imm) + +# define FADD(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x0) +# define FSUB(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x1) +# define FMUL(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x2) +# define FDIV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x3) +# define FCMPEQ(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x4) +# define FCMPGT(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x5) +# define LDXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x6) +# define STXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x7) +# define LDF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x8) +# define LDFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x9) +# define STF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xa) +# define STFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xb) +# define FMOV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xc) +# define FMAC(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xe) +# define FSTS(rn) _cni(_jit, 0xf, rn, 0x0d) +# define FLDS(rn) _cni(_jit, 0xf, rn, 0x1d) +# define FLOAT(rn) _cni(_jit, 0xf, rn, 0x2d) +# define FTRC(rn) _cni(_jit, 0xf, rn, 0x3d) +# define FNEG(rn) _cni(_jit, 0xf, rn, 0x4d) +# define FABS(rn) _cni(_jit, 0xf, rn, 0x5d) +# define FSQRT(rn) _cni(_jit, 0xf, rn, 0x6d) +# define FLDI0(rn) _cni(_jit, 0xf, rn, 0x8d) +# define FLDI1(rn) _cni(_jit, 0xf, rn, 0x9d) +# define FCNVSD(rn) _cni(_jit, 0xf, rn, 0xad) +# define FCNVDS(rn) _cni(_jit, 0xf, rn, 0xbd) + +# define FMOVXX(rn, rm) FMOV((rn) | 1, (rm) | 1) +# define FMOVDX(rn, rm) FMOV((rn) | 0, (rm) | 1) +# define FMOVXD(rn, rm) FMOV((rn) | 1, (rm) | 0) + +# define CLRT() ii(0x8) +# define NOP() ii(0x9) +# define RTS() ii(0xb) +# define SETT() ii(0x18) +# define DIV0U() ii(0x19) +# define FSCHG() ii(0xf3fd) +# define FRCHG() ii(0xfbfd) + +# define ii(i) *_jit->pc.us++ = i + +# define stack_framesize ((JIT_V_NUM + 2) * 4) + +# define PR_FLAG (1 << 19) +# define SZ_FLAG (1 << 20) +# define FR_FLAG (1 << 21) + +static void _nop(jit_state_t*,jit_word_t); +# define nop(i0) _nop(_jit,i0) +static void _movr(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr(r0,r1) _movr(_jit,r0,r1) +static void _movi(jit_state_t*,jit_uint16_t,jit_word_t); +# define movi(r0,i0) _movi(_jit,r0,i0) +static void _movnr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2,1) +# define movzr(r0,r1,r2) _movnr(_jit,r0,r1,r2,0) +# define casx(r0,r1,r2,r3,i0) _casx(_jit,r0,r1,r2,r3,i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define casr(r0,r1,r2,r3) casx(r0,r1,r2,r3,0) +# define casi(r0,i0,r1,r2) casx(r0,_NOREG,r1,r2,i0) +static void _addr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) +static void _addcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2) +static void _addxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addxr(r0,r1,r2) _addxr(_jit,r0,r1,r2) +static void _addi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) +static void _addci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0) +static void _addxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0) +static void _subr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr(r0,r1,r2) _subr(_jit,r0,r1,r2) +static void _subcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subcr(r0,r1,r2) _subcr(_jit,r0,r1,r2) +static void _subxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subxr(r0,r1,r2) _subxr(_jit,r0,r1,r2) +static void _subi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0) +static void _subci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0) +static void _subxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) +static void _rsbi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rsbi(r0,r1,i0) _rsbi(_jit,r0,r1,i0) +static void _mulr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) +static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _qmulr(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _muli(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) +static void _qmuli(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _divr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) +static void _divr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_u(r0,r1,r2) _divr_u(_jit,r0,r1,r2) +static void _qdivr(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qdivr(r0,r1,r2,r3) _qdivr(_jit,r0,r1,r2,r3) +static void _qdivr_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qdivr_u(r0,r1,r2,r3) _qdivr_u(_jit,r0,r1,r2,r3) +static void _divi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) +# define divi_u(r0,r1,i0) fallback_divi_u(r0,r1,i0) +static void _qdivi(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0) +static void _qdivi_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0) +static void _remr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) +static void _remr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2) +static void _remi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) +static void _remi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_uint16_t,jit_uint16_t); +#define extr(r0,r1,i0,i1) fallback_ext(r0,r1,i0,i1) +#define extr_u(r0,r1,i0,i1) fallback_ext_u(r0,r1,i0,i1) +#define depr(r0,r1,i0,i1) fallback_dep(r0,r1,i0,i1) +# define extr_c(r0, r1) EXTSB(r0,r1) +# define extr_s(r0,r1) EXTSW(r0,r1) +# define extr_uc(r0,r1) EXTUB(r0,r1) +# define extr_us(r0,r1) EXTUW(r0,r1) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0) +static void _andr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2) +static void _andi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) +static void _orr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define orr(r0,r1,r2) _orr(_jit,r0,r1,r2) +static void _ori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0) +static void _xorr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define xorr(r0,r1,r2) _xorr(_jit,r0,r1,r2) +static void _xori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) +# define comr(r0,r1) NOT(r0,r1) +# define negr(r0,r1) NEG(r0,r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0,r1) _clor(_jit,r0,r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0,r1) _clzr(_jit,r0,r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0,r1) _ctor(_jit,r0,r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0,r1) _ctzr(_jit,r0,r1) +static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t); +# define rbitr(r0, r1) _rbitr(_jit, r0, r1) +static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t); +# define popcntr(r0, r1) _popcntr(_jit, r0, r1) +static void _gtr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define gtr(r0,r1,r2) _gtr(_jit,r0,r1,r2) +static void _ger(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ger(r0,r1,r2) _ger(_jit,r0,r1,r2) +static void _gtr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define gtr_u(r0,r1,r2) _gtr_u(_jit,r0,r1,r2) +static void _ger_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ger_u(r0,r1,r2) _ger_u(_jit,r0,r1,r2) +# define ltr(r0,r1,r2) gtr(r0,r2,r1) +# define ltr_u(r0,r1,r2) gtr_u(r0,r2,r1) +# define ler(r0,r1,r2) ger(r0,r2,r1) +# define ler_u(r0,r1,r2) ger_u(r0,r2,r1) +static void _eqr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define eqr(r0,r1,r2) _eqr(_jit,r0,r1,r2) +static void _ner(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ner(r0,r1,r2) _ner(_jit,r0,r1,r2) +static void _eqi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define eqi(r0,r1,i0) _eqi(_jit,r0,r1,i0) +static void _nei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) +static void _gti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gti(r0,r1,i0) _gti(_jit,r0,r1,i0) +static void _gei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gei(r0,r1,i0) _gei(_jit,r0,r1,i0) +static void _gti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gti_u(r0,r1,i0) _gti_u(_jit,r0,r1,i0) +static void _gei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gei_u(r0,r1,i0) _gei_u(_jit,r0,r1,i0) +static void _lti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) +static void _lei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lei(r0,r1,i0) _lei(_jit,r0,r1,i0) +static void _lti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lti_u(r0,r1,i0) _lti_u(_jit,r0,r1,i0) +static void _lei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lei_u(r0,r1,i0) _lei_u(_jit,r0,r1,i0) +static void _lshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2) +static void _rshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2) +static void _rshr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) +static void _lshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +static void _rshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +# define qlshr(r0,r1,r2,r3) _qlshr(_jit,r0,r1,r2,r3) +static void +_qlshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshr_u(r0, r1, r2, r3) _qlshr_u(_jit,r0,r1,r2,r3) +static void +_qlshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0) +# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0) +# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0) +static void +_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define qrshr(r0, r1, r2, r3) _qrshr(_jit,r0,r1,r2,r3) +static void +_qrshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshr_u(r0, r1, r2, r3) _qrshr_u(_jit,r0,r1,r2,r3) +static void +_qrshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0) +# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0) +# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0) +static void +_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define ldr_c(r0,r1) LDB(r0,r1) +# define ldr_s(r0,r1) LDW(r0,r1) +# define ldr_i(r0,r1) LDL(r0,r1) +static void _ldr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1) +static void _ldr_us(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1) +static void _ldi_c(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) +static void _ldi_s(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0) +static void _ldi_i(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0) +static void _ldi_uc(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0) +static void _ldi_us(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0) +static void _ldxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2) +static void _ldxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_s(r0,r1,r2) _ldxr_s(_jit,r0,r1,r2) +static void _ldxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_i(r0,r1,r2) _ldxr_i(_jit,r0,r1,r2) +static void _ldxr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2) +static void _ldxr_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2) +static void _ldxi_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0) +static void _ldxi_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0) +static void _ldxi_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) +static void _ldxi_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0) +static void _ldxi_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0) +# define ldxbi_c(r0,r1,i0) generic_ldxbi_c(r0,r1,i0) +# define ldxbi_uc(r0,r1,i0) generic_ldxbi_uc(r0,r1,i0) +# define ldxbi_s(r0,r1,i0) generic_ldxbi_s(r0,r1,i0) +# define ldxbi_us(r0,r1,i0) generic_ldxbi_us(r0,r1,i0) +# define ldxbi_i(r0,r1,i0) generic_ldxbi_i(r0,r1,i0) +static void _ldxai_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_c(r0,r1,i0) _ldxai_c(_jit,r0,r1,i0) +static void _ldxai_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_uc(r0,r1,i0) _ldxai_uc(_jit,r0,r1,i0) +static void _ldxai_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_s(r0,r1,i0) _ldxai_s(_jit,r0,r1,i0) +static void _ldxai_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_us(r0,r1,i0) _ldxai_us(_jit,r0,r1,i0) +static void _ldxai_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_i(r0,r1,i0) _ldxai_i(_jit,r0,r1,i0) +# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0) +# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1) +# define unldr_u(r0, r1, i0) fallback_unldr_u(r0, r1, i0) +# define unldi_u(r0, i0, i1) fallback_unldi_u(r0, i0, i1) +# define str_c(r0,r1) STB(r0,r1) +# define str_s(r0,r1) STW(r0,r1) +# define str_i(r0,r1) STL(r0,r1) +static void _sti_c(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_c(i0,r0) _sti_c(_jit,i0,r0) +static void _sti_s(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_s(i0,r0) _sti_s(_jit,i0,r0) +static void _sti_i(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_i(i0,r0) _sti_i(_jit,i0,r0) +static void _stxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_c(r0,r1,r2) _stxr_c(_jit,r0,r1,r2) +static void _stxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_s(r0,r1,r2) _stxr_s(_jit,r0,r1,r2) +static void _stxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_i(r0,r1,r2) _stxr_i(_jit,r0,r1,r2) +static void _stxi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1) +static void _stxi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1) +static void _stxi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1) +static void _stxbi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_c(i0,r0,r1) _stxbi_c(_jit,i0,r0,r1) +static void _stxbi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_s(i0,r0,r1) _stxbi_s(_jit,i0,r0,r1) +static void _stxbi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_i(i0,r0,r1) _stxbi_i(_jit,i0,r0,r1) +# define stxai_c(i0,r0,r1) generic_stxai_c(i0,r0,r1) +# define stxai_s(i0,r0,r1) generic_stxai_s(i0,r0,r1) +# define stxai_i(i0,r0,r1) generic_stxai_i(i0,r0,r1) +# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0) +# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1) +static jit_word_t _bger(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bltr(i0,r0,r1) bltr_p(i0,r0,r1,0) +# define bler(i0,r0,r1) bler_p(i0,r0,r1,0) +# define bgtr(i0,r0,r1) bgtr_p(i0,r0,r1,0) +# define bger(i0,r0,r1) bger_p(i0,r0,r1,0) +# define bltr_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,0,p) +# define bler_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,1,p) +# define bgtr_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,0,p) +# define bger_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,1,p) +static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bltr_u(i0,r0,r1) bltr_u_p(i0,r0,r1,0) +# define bler_u(i0,r0,r1) bler_u_p(i0,r0,r1,0) +# define bgtr_u(i0,r0,r1) bgtr_u_p(i0,r0,r1,0) +# define bger_u(i0,r0,r1) bger_u_p(i0,r0,r1,0) +# define bltr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,0,p) +# define bler_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,1,p) +# define bgtr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,0,p) +# define bger_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,1,p) +static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define beqr(i0,r0,r1) beqr_p(i0,r0,r1,0) +# define beqr_p(i0,r0,r1,p) _beqr(_jit,i0,r0,r1,p) +static jit_word_t _bner(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bner(i0,r0,r1) bner_p(i0,r0,r1,0) +# define bner_p(i0,r0,r1,p) _bner(_jit,i0,r0,r1,p) +static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bmsr(i0,r0,r1) bmsr_p(i0,r0,r1,0) +# define bmsr_p(i0,r0,r1,p) _bmsr(_jit,i0,r0,r1,p) +static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bmcr(i0,r0,r1) bmcr_p(i0,r0,r1,0) +# define bmcr_p(i0,r0,r1,p) _bmcr(_jit,i0,r0,r1,p) +static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define boaddr(i0,r0,r1) boaddr_p(i0,r0,r1,0) +# define bxaddr(i0,r0,r1) bxaddr_p(i0,r0,r1,0) +# define boaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,1,p) +# define bxaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,0,p) +static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define boaddr_u(i0,r0,r1) boaddr_u_p(i0,r0,r1,0) +# define bxaddr_u(i0,r0,r1) bxaddr_u_p(i0,r0,r1,0) +# define boaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,1,p) +# define bxaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,0,p) +static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bosubr(i0,r0,r1) bosubr_p(i0,r0,r1,0) +# define bxsubr(i0,r0,r1) bxsubr_p(i0,r0,r1,0) +# define bosubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,1,p) +# define bxsubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,0,p) +static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bosubr_u(i0,r0,r1) bosubr_u_p(i0,r0,r1,0) +# define bxsubr_u(i0,r0,r1) bxsubr_u_p(i0,r0,r1,0) +# define bosubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,1,p) +# define bxsubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,0,p) +static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blei(i0,r0,i1) blei_p(i0,r0,i1,0) +# define bgti(i0,r0,i1) bgti_p(i0,r0,i1,0) +# define blei_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,0,p) +# define bgti_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,1,p) +static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blti(i0,r0,i1) blti_p(i0,r0,i1,0) +# define bgei(i0,r0,i1) bgei_p(i0,r0,i1,0) +# define blti_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,0,p) +# define bgei_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,1,p) +static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blei_u(i0,r0,i1) blei_u_p(i0,r0,i1,0) +# define bgti_u(i0,r0,i1) bgti_u_p(i0,r0,i1,0) +# define blei_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,0,p) +# define bgti_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,1,p) +static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blti_u(i0,r0,i1) blti_u_p(i0,r0,i1,0) +# define bgei_u(i0,r0,i1) bgei_u_p(i0,r0,i1,0) +# define blti_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,0,p) +# define bgei_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,1,p) +static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define beqi(i0,r0,i1) beqi_p(i0,r0,i1,0) +# define bnei(i0,r0,i1) bnei_p(i0,r0,i1,0) +# define beqi_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,1,p) +# define bnei_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,0,p) +static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bmsi(i0,r0,i1) bmsi_p(i0,r0,i1,0) +# define bmci(i0,r0,i1) bmci_p(i0,r0,i1,0) +# define bmsi_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,0,p) +# define bmci_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,1,p) +static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define boaddi(i0,r0,i1) boaddi_p(i0,r0,i1,0) +# define bxaddi(i0,r0,i1) bxaddi_p(i0,r0,i1,0) +# define boaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,1,p) +# define bxaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,0,p) +static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define boaddi_u(i0,r0,i1) boaddi_u_p(i0,r0,i1,0) +# define bxaddi_u(i0,r0,i1) bxaddi_u_p(i0,r0,i1,0) +# define boaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,1,p) +# define bxaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,0,p) +static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bosubi(i0,r0,i1) bosubi_p(i0,r0,i1,0) +# define bxsubi(i0,r0,i1) bxsubi_p(i0,r0,i1,0) +# define bosubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,1,p) +# define bxsubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,0,p) +static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bosubi_u(i0,r0,i1) bosubi_u_p(i0,r0,i1,0) +# define bxsubi_u(i0,r0,i1) bxsubi_u_p(i0,r0,i1,0) +# define bosubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,1,p) +# define bxsubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,0,p) +static void _jmpr(jit_state_t*,jit_int16_t); +# define jmpr(r0) _jmpr(_jit,r0) +static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t); +# define jmpi(i0) _jmpi(_jit,i0,0) +static void _callr(jit_state_t*,jit_int16_t); +# define callr(r0) _callr(_jit,r0) +static void _calli(jit_state_t*,jit_word_t); +# define calli(i0) _calli(_jit,i0) + +static jit_word_t _movi_p(jit_state_t*,jit_uint16_t,jit_word_t); +# define movi_p(r0,i0) _movi_p(_jit,r0,i0) +static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); +# define jmpi_p(i0) _jmpi_p(_jit,i0) +static jit_word_t _calli_p(jit_state_t*,jit_word_t); +# define calli_p(i0) _calli_p(_jit,i0) +static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t); +# define patch_abs(instr,label) _patch_abs(_jit,instr,label) +static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +# define patch_at(jump,label) _patch_at(_jit,jump,label) +static void _prolog(jit_state_t*,jit_node_t*); +# define prolog(node) _prolog(_jit,node) +static void _epilog(jit_state_t*,jit_node_t*); +# define epilog(node) _epilog(_jit,node) +static void _vastart(jit_state_t*, jit_int32_t); +# define vastart(r0) _vastart(_jit, r0) +static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); +# define vaarg(r0, r1) _vaarg(_jit, r0, r1) + +# define ldr(r0,r1) ldr_i(r0,r1) +# define ldi(r0,i0) ldi_i(r0,i0) +# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2) +# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) +# define str(r0,r1) str_i(r0,r1) +# define sti(i0,r0) sti_i(i0,r0) +# define stxr(r0,r1,r2) stxr_i(r0,r1,r2) +# define stxi(i0,r0,r1) stxi_i(i0,r0,r1) + +# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) +# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) +# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0) +# define masked_bits_count(im) __builtin_popcountl(im) +# define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im)) + +# if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +# define jit_sh34_p() 1 +# else +# define jit_sh34_p() 0 +# endif + +static void _maybe_emit_frchg(jit_state_t *_jit); +# define maybe_emit_frchg() _maybe_emit_frchg(_jit) +static void _maybe_emit_fschg(jit_state_t *_jit); +# define maybe_emit_fschg() _maybe_emit_fschg(_jit) +#endif /* PROTO */ + +#if CODE +static void +_cni(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, jit_uint16_t i) +{ + jit_instr_t op; + + op.ni = (struct jit_instr_ni){ .c = c, .n = n, .i = i }; + + ii(op.op); +} + +static void +_cnmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, + jit_uint16_t m, jit_uint16_t d) +{ + jit_instr_t op; + + op.nmd = (struct jit_instr_nmd){ .c = c, .n = n, .m = m, .d = d }; + + ii(op.op); +} + +static void +_cmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t m, jit_uint16_t d) +{ + jit_instr_t op; + + op.md = (struct jit_instr_md){ .c = c, .m = m, .d = d }; + + ii(op.op); +} + +static void +_cd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t d) +{ + jit_instr_t op; + + op.d = (struct jit_instr_d){ .c = c, .d = d }; + + ii(op.op); +} + +static void +_nop(jit_state_t *_jit, jit_word_t i0) +{ + for (; i0 > 0; i0 -= 2) + NOP(); + assert(i0 == 0); +} + +static void +_movr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (r1 == _GBR) + STCGBR(r0); + else if (r0 == _GBR) + LDCGBR(r1); + else + MOV(r0, r1); + } +} + +static void +movi_loop(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t tmp; + + if (i0 >= -128 && i0 < 128) { + MOVI(r0, i0); + } else { + tmp = (i0 >> 8) + !!(i0 & 0x80); + if (tmp & 0xff) { + movi_loop(_jit, r0, tmp); + if (tmp != 0) + SHLL8(r0); + } else { + tmp = (i0 >> 16) + !!(i0 & 0x80); + movi_loop(_jit, r0, tmp); + if (tmp != 0) + SHLL16(r0); + } + if (i0 & 0xff) + ADDI(r0, i0 & 0xff); + } +} + +static jit_word_t +movi_loop_cnt(jit_word_t i0) +{ + jit_word_t tmp, cnt = 0; + + if (i0 >= -128 && i0 < 128) { + cnt = 1; + } else { + tmp = (i0 >> 8) + !!(i0 & 0x80); + if (tmp & 0xff) { + cnt += !!tmp + movi_loop_cnt(tmp); + } else { + tmp = (i0 >> 16) + !!(i0 & 0x80); + cnt += !!tmp + movi_loop_cnt(tmp); + } + cnt += !!(i0 & 0xff); + } + + return cnt; +} + +static void +_movi(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t w = _jit->pc.w & ~3; + + if (i0 >= -128 && i0 < 128) { + MOVI(r0, i0); + } else if (!(i0 & 0x1) && i0 >= -256 && i0 < 256) { + MOVI(r0, i0 >> 1); + SHLL(r0); + } else if (!(i0 & 0x3) && i0 >= -512 && i0 < 512) { + MOVI(r0, i0 >> 2); + SHLL2(r0); + } else if (i0 >= w && i0 <= w + 0x3ff && !((i0 - w) & 0x3)) { + MOVA((i0 - w) >> 2); + movr(r0, _R0); + } else if (is_low_mask(i0)) { + MOVI(r0, -1); + rshi_u(r0, r0, unmasked_bits_count(i0)); + } else if (is_high_mask(i0)) { + MOVI(r0, -1); + lshi(r0, r0, unmasked_bits_count(i0)); + } else if (movi_loop_cnt(i0) < 4) { + movi_loop(_jit, r0, i0); + } else { + load_const(0, r0, i0); + } +} + +static void +emit_branch_opcode(jit_state_t *_jit, jit_word_t i0, jit_word_t w, + int t_set, int force_patchable) +{ + jit_int32_t disp = (i0 - w >> 1) - 2; + jit_uint16_t reg; + + if (!force_patchable && i0 == 0) { + /* Positive displacement - we don't know the target yet. */ + if (t_set) + BT(0); + else + BF(0); + + /* Leave space after the BF/BT in case we need to add a + * BRA opcode. */ + w = _jit->code.length - (_jit->pc.uc - _jit->code.ptr); + if (w > 254) { + NOP(); + NOP(); + } + } else if (!force_patchable && disp >= -128) { + if (t_set) + BT(disp); + else + BF(disp); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (force_patchable) + movi_p(rn(reg), i0); + else + movi(rn(reg), i0); + if (t_set) + BF(0); + else + BT(0); + JMP(rn(reg)); + NOP(); + + jit_unget_reg(reg); + } +} + +static void _maybe_emit_frchg(jit_state_t *_jit) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + if (_jitc->no_flag && instr->op == 0xfbfd) + _jit->pc.us--; + else + FRCHG(); +} + +static void _maybe_emit_fschg(jit_state_t *_jit) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + if (_jitc->no_flag && instr->op == 0xf3fd) + _jit->pc.us--; + else + FSCHG(); +} + +static void maybe_emit_tst(jit_state_t *_jit, jit_uint16_t r0, jit_bool_t *set) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + /* If the previous opcode is a MOVT(r0), we can skip the TST opcode, + * but we need to invert the branch condition. */ + if (_jitc->no_flag && instr->op == (0x29 | (r0 << 8))) + *set ^= 1; + else + TST(r0, r0); +} + +static void _movnr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_bool_t set) +{ + maybe_emit_tst(_jit, r2, &set); + + emit_branch_opcode(_jit, 4, 0, set, 0); + movr(r0, r1); +} + +static char atomic_byte; + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi, addr_reg; + + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + + addr_reg = jit_get_reg(jit_class_gpr); + movi(rn(addr_reg), (uintptr_t)&atomic_byte); + + TAS(rn(addr_reg)); + BF(-3); + + LDL(r0, r1); + CMPEQ(r0, r2); + MOVT(r0); + + BF(0); + STL(r1, r3); + + MOVI(_R0, 0); + STB(rn(addr_reg), _R0); + + jit_unget_reg(addr_reg); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_addr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + ADD(r0, r1); + } else { + movr(r0, r1); + ADD(r0, r2); + } +} + +static void +_addcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CLRT(); + addxr(r0, r1, r2); +} + +static void +_addxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + ADDC(r0, r1); + } else { + movr(r0, r1); + ADDC(r0, r2); + } +} + +static void +_addi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 >= -128 && i0 < 127) { + movr(r0, r1); + ADDI(r0, i0); + } else if (r0 != r1) { + movi(r0, i0); + addr(r0, r1, r0); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + addr(r0, r1, _R0); + } +} + +static void +_addci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + CLRT(); + addxi(r0, r1, i0); +} + +static void +_addxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + addxr(r0, r1, _R0); +} + +static void +_subr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r1 == r2) { + movi(r0, 0); + } else if (r0 == r2) { + NEG(r0, r2); + ADD(r0, r1); + } else { + movr(r0, r1); + SUB(r0, r2); + } +} + +static void +_subcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CLRT(); + subxr(r0, r1, r2); +} + +static void +_subxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg; + + if (r0 != r2) { + movr(r0, r1); + SUBC(r0, r2); + } else { + reg = jit_get_reg(jit_class_gpr); + + movr(rn(reg), r0); + movr(r0, r1); + SUBC(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_subi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + addi(r0, r1, -i0); +} + +static void +_subci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + subcr(r0, r1, _R0); +} + +static void +_subxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + subxr(r0, r1, _R0); +} + +static void +_rsbi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if ((jit_uword_t)((i0 >> 7) + 1) < 2) { + negr(r0, r1); + ADDI(r0, i0); + } else if (r0 != r1) { + assert(r0 != _R0 && r1 != _R0); + + movi(r0, i0); + subr(r0, r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + subr(r0, _R0, r1); + } +} + +static void +_mulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + MULL(r1, r2); + STSL(r0); +} + +static void +_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + DMULS(r1, r2); + STSH(r0); +} + +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + movi(_R0, i0); + hmulr(r0, r1, _R0); +} + +static void +_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + DMULU(r1, r2); + STSH(r0); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + movi(_R0, i0); + hmulr_u(r0, r1, _R0); +} + +static void +_qmulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + DMULS(r2, r3); + STSL(r0); + STSH(r1); +} + +static void +_qmulr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + DMULU(r2, r3); + STSL(r0); + STSH(r1); +} + +static void +_muli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + mulr(r0, r1, _R0); +} + +static void +_qmuli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + assert(r2 != _R0); + + movi(_R0, i0); + qmulr(r0, r1, r2, _R0); +} + +static void +_qmuli_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + assert(r2 != _R0); + + movi(_R0, i0); + qmulr_u(r0, r1, r2, _R0); +} + +static void +_divr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg, reg2; + jit_uint16_t divisor; + + assert(r1 != _R0 && r2 != _R0); + + if (r1 == r2) { + MOVI(r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (r0 == r2) { + reg2 = jit_get_reg(jit_class_gpr); + movr(rn(reg2), r2); + divisor = rn(reg2); + } else { + divisor = r2; + } + + movr(r0, r1); + MOVI(_R0, 0); + + CMPGT(_R0, r0); + SUBC(rn(reg), rn(reg)); + SUBC(r0, _R0); + + MOVI(_R0, -2); + DIV0S(rn(reg), divisor); + + ROTCL(r0); + DIV1(rn(reg), divisor); + ROTCL(_R0); + XORI(1); + BTS(-6); + TSTI(1); + + ROTCL(r0); + MOVI(_R0, 0); + ADDC(r0, _R0); + + jit_unget_reg(reg); + if (r0 == r2) + jit_unget_reg(reg2); + } +} + +static void +_divr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg, reg2; + jit_uint16_t divisor; + + assert(r1 != _R0 && r2 != _R0); + + if (r1 == r2) { + MOVI(r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (r0 == r2) { + reg2 = jit_get_reg(jit_class_gpr); + movr(rn(reg2), r2); + divisor = rn(reg2); + } else { + divisor = r2; + } + + movr(r0, r1); + MOVI(rn(reg), 0); + MOVI(_R0, -2); + DIV0U(); + + ROTCL(r0); + DIV1(rn(reg), divisor); + ROTCL(_R0); + XORI(1); + BTS(-6); + TSTI(1); + + ROTCL(r0); + + jit_unget_reg(reg); + if (r0 == r2) + jit_unget_reg(reg2); + } +} + +static void +_qdivr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint32_t reg; + + assert(r2 != _R0 && r3 != _R0); + + if (r0 != r2 && r0 != r3) { + divr(r0, r2, r3); + mulr(_R0, r0, r3); + subr(r1, r2, _R0); + } else { + reg = jit_get_reg(jit_class_gpr); + + divr(rn(reg), r2, r3); + mulr(_R0, rn(reg), r3); + subr(r1, r2, _R0); + movr(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_qdivr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint32_t reg; + + assert(r2 != _R0 && r3 != _R0); + + if (r0 != r2 && r0 != r3) { + divr_u(r0, r2, r3); + mulr(_R0, r0, r3); + subr(r1, r2, _R0); + } else { + reg = jit_get_reg(jit_class_gpr); + + divr_u(rn(reg), r2, r3); + mulr(_R0, rn(reg), r3); + subr(r1, r2, _R0); + movr(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_divi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + divr(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_qdivi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + qdivr(r0, r1, r2, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_qdivi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + if (r0 != r2 && r1 != r2) { + fallback_divi_u(r0, r2, i0); + muli(r1, r0, i0); + subr(r1, r2, r1); + } else { + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + fallback_divi_u(rn(reg), r2, i0); + muli(_R0, rn(reg), i0); + subr(r1, r2, _R0); + + jit_unget_reg(reg); + } +} + +static void +_remr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + assert(r1 != _R0 && r2 != _R0); + + qdivr(rn(reg), r0, r1, r2); + + jit_unget_reg(reg); +} + +static void +_remr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + assert(r1 != _R0 && r2 != _R0); + + qdivr_u(rn(reg), r0, r1, r2); + + jit_unget_reg(reg); +} + +static void +_remi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + remr(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_remi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + qdivi_u(rn(reg), r0, r1, i0); + + jit_unget_reg(reg); +} + +static void +_bswapr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + EXTUW(r0, r1); + SWAPB(r0, r0); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + SWAPB(r0, r1); + SWAPW(r0, r0); + SWAPB(r0, r0); +} + +static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + + ROTL(r0); + TST(_R0, _R0); + BFS(-4); + ADDI(_R0, -1); + + ROTR(r0); +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + + ROTR(r0); + TST(_R0, _R0); + BFS(-4); + ADDI(_R0, -1); + + ROTL(r0); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + unsigned int i; + + assert(i0 >= 0 && i0 <= __WORDSIZE - 1); + assert(r0 != _R0); + + movr(r0, r1); + + if (i0 < 6) { + for (i = 0; i < i0; i++) + ROTR(r0); + } else if (__WORDSIZE - i0 < 6) { + for (i = 0; i < __WORDSIZE - i0; i++) + ROTL(r0); + } else { + movi(_R0, i0); + rrotr(r0, r0, _R0); + } +} + +static void +_andr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + AND(r0, r1); + } else { + movr(r0, r1); + AND(r0, r2); + } +} + +static void +_andi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0xff) { + extr_uc(r0, r1); + } else if (i0 == 0xffff) { + extr_us(r0, r1); + } else if (i0 == 0xffff0000) { + SWAPW(r0, r1); + SHLL16(r0); + } else if (r0 != r1) { + movi(r0, i0); + AND(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + AND(r0, _R0); + } +} + +static void +_orr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + OR(r0, r1); + } else { + movr(r0, r1); + OR(r0, r2); + } +} + +static void +_ori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (r0 != r1) { + movi(r0, i0); + OR(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + OR(r0, _R0); + } +} + +static void +_xorr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + XOR(r0, r1); + } else { + movr(r0, r1); + XOR(r0, r2); + } +} + +static void +_xori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (r0 == _R0 && !(i0 & ~0xff)) { + movr(r0, r1); + XORI(i0); + } else if (r0 != r1) { + movi(r0, i0); + XOR(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + XOR(r0, _R0); + } +} + +static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SHLL(_R0); + BTS(-3); + ADDI(r0, 1); +} + +static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SETT(); + ROTCL(_R0); + BFS(-3); + ADDI(r0, 1); +} + +static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SHLR(_R0); + BTS(-3); + ADDI(r0, 1); +} + +static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SETT(); + ROTCR(_R0); + BFS(-3); + ADDI(r0, 1); +} + +static void +_rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + + SETT(); + ROTCR(_R0); + ROTCL(r0); + CMPEQI(1); + emit_branch_opcode(_jit, -6, 0, 0, 0); +} + +static void +_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(r0 != _R0); + + movr(_R0, r1); + movi(r0, 0); + + SHLR(_R0); + NEGC(r0, r0); + TST(_R0, _R0); + BFS(-5); + NEG(r0, r0); +} + +static void +_gtr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPGT(r1, r2); + MOVT(r0); +} + +static void +_gtr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPHI(r1, r2); + MOVT(r0); +} + +static void +_ger(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPGE(r1, r2); + MOVT(r0); +} + +static void +_ger_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPHS(r1, r2); + MOVT(r0); +} + +static void +_eqr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPEQ(r1, r2); + MOVT(r0); +} + +static void +_ner(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + MOVI(_R0, -1); + CMPEQ(r1, r2); + NEGC(r0, _R0); +} + +static void +_eqi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + TST(r1, r1); + } else if (i0 >= -128 && i0 < 128) { + assert(r1 != _R0); + + movr(_R0, r1); + CMPEQI(i0); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPEQ(r1, _R0); + } + MOVT(r0); +} + +static void +_nei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + if (i0 == 0) { + TST(r1, r1); + } else if (i0 >= -128 && i0 < 128) { + movr(_R0, r1); + CMPEQI(i0); + } else { + movi(_R0, i0); + CMPEQ(r1, _R0); + } + + MOVI(_R0, -1); + NEGC(r0, _R0); +} + +static void +_gti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + CMPPL(r1); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPGT(r1, _R0); + } + MOVT(r0); +} + +static void +_gei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + CMPPZ(r1); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPGE(r1, _R0); + } + MOVT(r0); +} + +static void +_gti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHI(r1, _R0); + MOVT(r0); +} + +static void +_gei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHS(r1, _R0); + MOVT(r0); +} + +static void +_lti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (i0 == 0) { + movr(r0, r1); + ROTCL(r0); + MOVT(r0); + } else { + movi(_R0, i0); + CMPGT(_R0, r1); + MOVT(r0); + } +} + +static void +_lei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPGE(_R0, r1); + MOVT(r0); +} + +static void +_lti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHI(_R0, r1); + MOVT(r0); +} + +static void +_lei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHS(_R0, r1); + MOVT(r0); +} + +static void +emit_shllr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (jit_sh34_p()) + SHLD(r0, r1); + else { + movr(_R0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHLL(r0); + } +} + +static void +_lshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + assert(r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + emit_shllr(_jit, r0, _R0); + } else { + movr(r0, r1); + emit_shllr(_jit, r0, r2); + } +} + +static void +_rshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + if (jit_sh34_p()) { + negr(_R0, r2); + movr(r0, r1); + SHAD(r0, _R0); + } else { + movr(_R0, r2); + movr(r0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHAR(r0); + } +} + +static void +_rshr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + if (jit_sh34_p()) { + negr(_R0, r2); + movr(r0, r1); + SHLD(r0, _R0); + } else { + movr(_R0, r2); + movr(r0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHLR(r0); + } +} + +static void +_lshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg, mask = 0x00838387; + + movr(r0, r1); + + if (i0 == 0) + return; + + if (i0 == 4) { + SHLL2(r0); + SHLL2(r0); + } else if (mask & (1 << (i0 - 1))) { + if (i0 & 0x10) + SHLL16(r0); + if (i0 & 0x8) + SHLL8(r0); + if (i0 & 0x2) + SHLL2(r0); + if (i0 & 0x1) + SHLL(r0); + } else { + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + lshr(r0, r0, rn(reg)); + + if (r0 == _R0) + jit_unget_reg(reg); + } +} + +static void +_rshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg; + + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + movr(r0, r1); + if (jit_sh34_p()) { + movi(rn(reg), -i0); + SHAD(r0, rn(reg)); + } else { + assert(i0 > 0); + movi(rn(reg), i0); + DT(rn(reg)); + BFS(-3); + SHAR(r0); + } + + if (r0 == _R0) + jit_unget_reg(reg); +} + +static void +_rshi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg, mask = 0x00838387; + + movr(r0, r1); + + if (i0 == 0) + return; + + if (i0 == 4) { + SHLR2(r0); + SHLR2(r0); + } else if (mask & (1 << (i0 - 1))) { + if (i0 & 0x10) + SHLR16(r0); + if (i0 & 0x8) + SHLR8(r0); + if (i0 & 0x2) + SHLR2(r0); + if (i0 & 0x1) + SHLR(r0); + } else { + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + if (jit_sh34_p()) { + movi(rn(reg), -i0); + SHLD(r0, rn(reg)); + } else { + movi(rn(reg), i0); + DT(rn(reg)); + BFS(-3); + SHLR(r0); + } + + if (r0 == _R0) + jit_unget_reg(reg); + } +} + +static void +_qlshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + movr(_R0, r3); + movr(r0, r2); + CMPEQI(32); + movr(r1, r2); + BF(0); + XOR(r0, r0); + SHAD(r0, _R0); + ADDI(_R0, -__WORDSIZE); + SHAD(r1, _R0); +} + +static void +_qlshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + movr(_R0, r3); + movr(r0, r2); + CMPEQI(32); + movr(r1, r2); + BF(0); + XOR(r0, r0); + SHLD(r0, _R0); + ADDI(_R0, -__WORDSIZE); + SHLD(r1, _R0); +} + +static void +_xlshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + if (sign) + rshi(r1, r2, __WORDSIZE - i0); + else + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } +} + +static void +_qrshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + NEG(_R0, r3); + movr(r1, r2); + CMPEQI(0); + movr(r0, r2); + BF(0); + MOV(r1, _R0); + SHAD(r0, _R0); + ADDI(_R0, __WORDSIZE); + SHAD(r1, _R0); +} + +static void +_qrshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + NEG(_R0, r3); + movr(r1, r2); + CMPEQI(0); + movr(r0, r2); + BF(0); + MOV(r1, _R0); + SHLD(r0, _R0); + ADDI(_R0, __WORDSIZE); + SHLD(r1, _R0); +} + +static void +_xrshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + if (sign) + rshi(r0, r2, __WORDSIZE - 1); + else + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + lshi(r1, r2, __WORDSIZE - i0); + if (sign) + rshi(r0, r2, i0); + else + rshi_u(r0, r2, i0); + } +} + +static void _ldr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + ldr_c(r0, r1); + extr_uc(r0, r0); +} + +static void _ldr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + ldr_s(r0, r1); + extr_us(r0, r0); +} + +static void _ldi_c(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_c(r0, _R0); +} + +static void _ldi_s(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_s(r0, _R0); +} + +static void _ldi_i(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_i(r0, _R0); +} + +static void _ldi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_uc(r0, _R0); +} + +static void _ldi_us(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_us(r0, _R0); +} + +static void +_ldxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRB(r0, r1); +} + +static void +_ldxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRW(r0, r1); +} + +static void +_ldxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRL(r0, r1); +} + +static void +_ldxr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + ldxr_c(r0, r1, r2); + extr_uc(r0, r0); +} + +static void +_ldxr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + ldxr_s(r0, r1, r2); + extr_us(r0, r0); +} + +static void +_ldxi_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0xff) { + GBRLDB(i0); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_c(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0xf) { + LDDB(r1, i0); + movr(r0, _R0); + } else { + movi(_R0, i0); + ldxr_c(r0, r1, _R0); + } +} + +static void +_ldxi_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) { + GBRLDW(i0 >> 1); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_s(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0x1f && !(i0 & 0x1)) { + LDDW(r1, i0 >> 1); + movr(r0, _R0); + } else { + movi(_R0, i0); + ldxr_s(r0, r1, _R0); + } +} + +static void +_ldxi_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) { + GBRLDL(i0 >> 2); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_i(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 0x3)) { + LDDL(r0, r1, i0 >> 2); + } else { + movi(_R0, i0); + ldxr_i(r0, r1, _R0); + } +} + +static void +_ldxi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + ldxi_c(_R0, r1, i0); + extr_uc(r0, _R0); +} + +static void +_ldxi_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + ldxi_s(_R0, r1, i0); + extr_us(r0, _R0); +} + +static void +_ldxai_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 1) + LDBU(r0, r1); + else + generic_ldxai_c(r0, r1, i0); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 1) + LDBU(r0, r1); + else + generic_ldxai_c(r0, r1, i0); + extr_uc(r0, r0); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 2) + LDWU(r0, r1); + else + generic_ldxai_s(r0, r1, i0); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 2) + LDWU(r0, r1); + else + generic_ldxai_s(r0, r1, i0); + extr_us(r0, r0); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 4) + LDLU(r0, r1); + else + generic_ldxai_i(r0, r1, i0); +} + +static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_c(_R0, r0); +} + +static void _sti_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_s(_R0, r0); +} + +static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_i(_R0, r0); +} + +static void +_stxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRB(r1, r2); +} + +static void +_stxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRW(r1, r2); +} + +static void +_stxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRL(r1, r2); +} + +static void +_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0xff) { + movr(_R0, r1); + GBRSTB(i0); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_c(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_c(_R0, r0, r1); + } +} + +static void +_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) { + movr(_R0, r1); + GBRSTW(i0 >> 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_s(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_s(_R0, r0, r1); + } +} + +static void +_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) { + movr(_R0, r1); + GBRSTL(i0 >> 2); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_i(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 3)) { + STDL(r0, r1, i0 >> 2); + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_i(_R0, r0, r1); + } +} + +static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -1) + STBU(r0, r1); + else + generic_stxbi_c(i0, r0, r1); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -2) + STWU(r0, r1); + else + generic_stxbi_s(i0, r0, r1); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -4) + STLU(r0, r1); + else + generic_stxbi_i(i0, r0, r1); +} + +static jit_word_t +_bger(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t t, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPGE(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, t, p); + + return (w); +} + +static jit_word_t +_bger_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t t, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPHS(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, t, p); + + return (w); +} + +static jit_word_t +_beqr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 == r1) { + if (p) + w = jmpi_p(i0); + else + w = _jmpi(_jit, i0, i0 == 0); + } else { + CMPEQ(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 1, p); + } + + return (w); +} + +static jit_word_t +_bner(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPEQ(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t +_bmsr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_bool_t set = 0; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 != r1) + TST(r0, r1); + else + maybe_emit_tst(_jit, r0, &set); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bmcr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_bool_t set = 1; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 != r1) + TST(r0, r1); + else + maybe_emit_tst(_jit, r0, &set); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgti(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + CMPPL(r0); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPGT(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgei(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + CMPPZ(r0); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPGE(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + maybe_emit_tst(_jit, r0, &set); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPHI(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + movi(_R0, i1); + CMPHS(r0, _R0); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + maybe_emit_tst(_jit, r0, &set); + } else if (i1 >= -128 && i1 < 128) { + movr(_R0, r0); + CMPEQI(i1); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPEQ(_R0, r0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bmsi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + movi(_R0, i1); + TST(_R0, r0); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + ADDV(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CLRT(); + ADDC(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + movi(_R0, i1); + w = _boaddr(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + movi(_R0, i1); + w = _boaddr_u(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + NEG(_R0, r1); + ADDV(r0, _R0); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CLRT(); + SUBC(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bosubi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + movi(_R0, i1); + w = _bosubr(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + movi(_R0, i1); + w = _bosubr_u(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static void +_jmpr(jit_state_t *_jit, jit_int16_t r0) +{ + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + JMP(r0); + NOP(); +} + +static jit_word_t +_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t force) +{ + jit_uint16_t reg; + jit_int32_t disp; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + disp = (i0 - w >> 1) - 2; + + if (force || (disp >= -2048 && disp <= 2046)) { + BRA(disp); + NOP(); + } else if (0) { + /* TODO: BRAF */ + reg = jit_get_reg(jit_class_gpr); + + movi_p(rn(reg), disp - 7); + BRAF(rn(reg)); + NOP(); + + jit_unget_reg(reg); + } else { + reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + jmpr(rn(reg)); + + jit_unget_reg(reg); + } + + return (w); +} + +static void +_callr(jit_state_t *_jit, jit_int16_t r0) +{ + reset_fpu(_jit, r0 == _R0); + + JSR(r0); + NOP(); + + reset_fpu(_jit, 1); +} + +static void +_calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_int32_t disp; + jit_uint16_t reg; + jit_word_t w; + + reset_fpu(_jit, 0); + + w = _jit->pc.w; + disp = (i0 - w >> 1) - 2; + + if (disp >= -2048 && disp <= 2046) { + BSR(disp); + } else { + movi(_R0, i0); + JSR(_R0); + } + + NOP(); + reset_fpu(_jit, 1); +} + +static jit_word_t +_movi_p(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t w = _jit->pc.w; + + load_const(1, r0, 0); + + return (w); +} + +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_uint16_t reg; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg(reg); + + return (w); +} + +static jit_word_t +_calli_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_uint16_t reg; + jit_word_t w; + + reset_fpu(_jit, 0); + + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + JSR(rn(reg)); + NOP(); + jit_unget_reg(reg); + + reset_fpu(_jit, 1); + + return (w); +} + +static void +_vastart(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t reg; + + assert(_jitc->function->self.call & jit_call_varargs); + + /* Return jit_va_list_t in the register argument */ + addi(r0, JIT_FP, _jitc->function->vaoff); + reg = jit_get_reg(jit_class_gpr); + + /* Align pointer to 8 bytes with +4 bytes offset (so that the + * double values are aligned to 8 bytes */ + andi(r0, r0, -8); + addi(r0, r0, 4); + + /* Initialize the gpr begin/end pointers */ + addi(rn(reg), r0, sizeof(jit_va_list_t) + + _jitc->function->vagp * sizeof(jit_uint32_t)); + stxi(offsetof(jit_va_list_t, bgpr), r0, rn(reg)); + + addi(rn(reg), rn(reg), NUM_WORD_ARGS * sizeof(jit_word_t) + - _jitc->function->vagp * sizeof(jit_uint32_t)); + stxi(offsetof(jit_va_list_t, egpr), r0, rn(reg)); + + /* Initialize the fpr begin/end pointers */ + if (_jitc->function->vafp) + addi(rn(reg), rn(reg), _jitc->function->vafp * sizeof(jit_float32_t)); + + stxi(offsetof(jit_va_list_t, bfpr), r0, rn(reg)); + addi(rn(reg), rn(reg), NUM_FLOAT_ARGS * sizeof(jit_float32_t) + - _jitc->function->vafp * sizeof(jit_float32_t)); + stxi(offsetof(jit_va_list_t, efpr), r0, rn(reg)); + + /* Initialize the stack pointer to the first stack argument */ + addi(rn(reg), JIT_FP, _jitc->function->self.size); + stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t rg0, rg1; + jit_word_t ge_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg(jit_class_gpr); + rg1 = jit_get_reg(jit_class_gpr); + + /* Load begin/end gpr pointers */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, egpr)); + movi(_R0, offsetof(jit_va_list_t, bgpr)); + ldxr(rn(rg0), r1, _R0); + + /* Check that we didn't reach the end gpr pointer. */ + CMPHS(rn(rg0), rn(rg1)); + + ge_code = _jit->pc.w; + BF(0); + + /* If we did, load the stack pointer instead. */ + movi(_R0, offsetof(jit_va_list_t, over)); + ldxr(rn(rg0), r1, _R0); + + patch_at(ge_code, _jit->pc.w); + + /* All good, we can now load the actual value */ + ldxai_i(r0, rn(rg0), sizeof(jit_uint32_t)); + + /* Update the pointer (gpr or stack) to the next word */ + stxr(_R0, r1, rn(rg0)); + + jit_unget_reg(rg0); + jit_unget_reg(rg1); +} + +static void +_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_instr_t *ptr = (jit_instr_t *)instr; + + ptr[0].ni.i = (label >> 24) & 0xff; + ptr[2].ni.i = (label >> 16) & 0xff; + ptr[4].ni.i = (label >> 8) & 0xff; + ptr[6].ni.i = (label >> 0) & 0xff; +} + +static void +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_instr_t *ptr = (jit_instr_t *)instr; + jit_int32_t disp; + + switch (ptr->nmd.c) { + case 0xe: + patch_abs(instr, label); + break; + case 0xc: + disp = ((label - (instr & ~0x3)) >> 2) - 1; + assert(disp >= 0 && disp <= 255); + ptr->ni.i = disp; + break; + case 0xa: + disp = ((label - instr) >> 1) - 2; + assert(disp >= -2048 && disp <= 2046); + ptr->d.d = disp; + break; + case 0x8: + switch (ptr->ni.n) { + case 0x9: + case 0xb: + case 0xd: + case 0xf: + disp = ((label - instr) >> 1) - 2; + if (disp >= -128 && disp <= 127) { + ptr->ni.i = disp; + } else { + /* Invert bit 1: BT(S) <-> BF(S) */ + ptr->ni.n ^= 1 << 1; + + /* Opcode 2 is now a BRA opcode */ + ptr[1].d = (struct jit_instr_d){ .c = 0xa, .d = disp - 1 }; + } + break; + default: + assert(!"unhandled branch opcode"); + } + break; + case 0xd: + if (ptr->op & 0xff) { + /* TODO: Fix the mess. patch_at() gets called with 'instr' pointing + * to the mov.l opcode and 'label' being the value that should be + * loaded into the register. So we read the address at which the mov.l + * points to, and write the label there. */ + *(jit_uint32_t *)((instr & ~0x3) + 4 + (ptr->op & 0xff) * 4) = label; + } else { + disp = ((label - instr) >> 2) - 1 + !!(instr & 0x3); + ptr->op = (ptr->op & 0xff00) | disp; + } + break; + default: + assert("unhandled branch opcode"); + } +} + +static void +_prolog(jit_state_t *_jit, jit_node_t *node) +{ + jit_uint16_t reg, regno, offs; + + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } + + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; + _jitc->function->stack = ((_jitc->function->self.alen - + /* align stack at 8 bytes */ + _jitc->function->self.aoff) + 7) & -8; + + ADDI(JIT_SP, -stack_framesize); + STDL(JIT_SP, JIT_FP, JIT_V_NUM + 1); + + STSPR(_R0); + STDL(JIT_SP, _R0, JIT_V_NUM); + + for (regno = 0; regno < JIT_V_NUM; regno++) + if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(regno))) + STDL(JIT_SP, JIT_V(regno), regno); + + movr(JIT_FP, JIT_SP); + + if (_jitc->function->stack) + subi(JIT_SP, JIT_SP, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, JIT_FP, rn(reg)); + jit_unget_reg(reg); + } + + if (_jitc->function->self.call & jit_call_varargs) { + /* Align to 8 bytes with +4 bytes offset (so that the double + * values are aligned to 8 bytes */ + andi(JIT_R0, JIT_FP, -8); + addi(JIT_R0, JIT_R0, 4); + + for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); regno++) { + stxi(_jitc->function->vaoff + + sizeof(jit_va_list_t) + + regno * sizeof(jit_word_t), + JIT_R0, rn(_R4 + regno)); + } + + for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); regno++) { + stxi_f(_jitc->function->vaoff + + sizeof(jit_va_list_t) + + NUM_WORD_ARGS * sizeof(jit_word_t) + + regno * sizeof(jit_float32_t), + JIT_R0, rn(_F4 + (regno ^ fpr_args_inverted()))); + } + } + + reset_fpu(_jit, 0); +} + +static void +_epilog(jit_state_t *_jit, jit_node_t *node) +{ + unsigned int i; + + if (_jitc->function->assume_frame) + return; + + reset_fpu(_jit, 1); + + movr(JIT_SP, JIT_FP); + + for (i = JIT_V_NUM; i > 0; i--) + if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(i - 1))) + LDDL(JIT_V(i - 1), JIT_SP, i - 1); + + LDDL(JIT_FP, JIT_SP, JIT_V_NUM); + LDSPR(JIT_FP); + + LDDL(JIT_FP, JIT_SP, JIT_V_NUM + 1); + RTS(); + ADDI(JIT_SP, stack_framesize); +} +#endif /* CODE */ diff --git a/lib/jit_sh-fpu.c b/lib/jit_sh-fpu.c new file mode 100644 index 0000000..e440a64 --- /dev/null +++ b/lib/jit_sh-fpu.c @@ -0,0 +1,2394 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#if PROTO +static void set_fmode(jit_state_t *_jit, jit_bool_t is_double); +static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double); +static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0); + +static void _extr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t); +# define extr_f(r0,r1) _extr_f(_jit,r0,r1,0) +# define extr_d(r0,r1) _extr_f(_jit,r0,r1,1) +static void _truncr_f_i(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t); +# define truncr_f_i(r0,r1) _truncr_f_i(_jit,r0,r1,0) +# define truncr_d_i(r0,r1) _truncr_f_i(_jit,r0,r1,1) +static void _fmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3) +static void _fmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmar_d(r0, r1, r2, r3) _fmar_d(_jit, r0, r1, r2, r3) +static void _fmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3) +static void _fmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3) +static void _fnmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmar_f(r0, r1, r2, r3) _fnmar_f(_jit, r0, r1, r2, r3) +static void _fnmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmar_d(r0, r1, r2, r3) _fnmar_d(_jit, r0, r1, r2, r3) +static void _fnmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmsr_f(r0, r1, r2, r3) _fnmsr_f(_jit, r0, r1, r2, r3) +static void _fnmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmsr_d(r0, r1, r2, r3) _fnmsr_d(_jit, r0, r1, r2, r3) +static void _movr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr_f(r0,r1) _movr_f(_jit,r0,r1) +static void _movr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr_d(r0,r1) _movr_d(_jit,r0,r1) +static void _movi_f(jit_state_t*,jit_uint16_t,jit_float32_t); +# define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_d(jit_state_t*,jit_uint16_t,jit_float64_t); +# define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _ltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ltr_f(r0,r1,r2) _ltr_f(_jit,r0,r1,r2,0) +# define ltr_d(r0,r1,r2) _ltr_f(_jit,r0,r1,r2,1) +static void _lti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define lti_f(r0,r1,i0) _lti_f(_jit,r0,r1,i0) +static void _lti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define lti_d(r0,r1,i0) _lti_d(_jit,r0,r1,i0) +static void _ler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ler_f(r0,r1,r2) _ler_f(_jit,r0,r1,r2,0) +# define ler_d(r0,r1,r2) _ler_f(_jit,r0,r1,r2,1) +static void _lei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define lei_f(r0,r1,i0) _lei_f(_jit,r0,r1,i0) +static void _lei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define lei_d(r0,r1,i0) _lei_d(_jit,r0,r1,i0) +static void _eqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define eqr_f(r0,r1,r2) _eqr_f(_jit,r0,r1,r2,0) +# define eqr_d(r0,r1,r2) _eqr_f(_jit,r0,r1,r2,1) +static void _eqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define eqi_f(r0,r1,i0) _eqi_f(_jit,r0,r1,i0) +static void _eqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define eqi_d(r0,r1,i0) _eqi_d(_jit,r0,r1,i0) +# define ger_f(r0,r1,r2) ler_f(r0,r2,r1) +# define ger_d(r0,r1,r2) ler_d(r0,r2,r1) +static void _gei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define gei_f(r0,r1,i0) _gei_f(_jit,r0,r1,i0) +static void _gei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define gei_d(r0,r1,i0) _gei_d(_jit,r0,r1,i0) +# define gtr_f(r0,r1,r2) ltr_f(r0,r2,r1) +# define gtr_d(r0,r1,r2) ltr_d(r0,r2,r1) +static void _gti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define gti_f(r0,r1,i0) _gti_f(_jit,r0,r1,i0) +static void _gti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define gti_d(r0,r1,i0) _gti_d(_jit,r0,r1,i0) +static void _ner_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ner_f(r0,r1,r2) _ner_f(_jit,r0,r1,r2,0) +# define ner_d(r0,r1,r2) _ner_f(_jit,r0,r1,r2,1) +static void _nei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define nei_f(r0,r1,i0) _nei_f(_jit,r0,r1,i0) +static void _nei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define nei_d(r0,r1,i0) _nei_d(_jit,r0,r1,i0) +static void _unltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define unltr_f(r0,r1,r2) _unltr_f(_jit,r0,r1,r2,0) +# define unltr_d(r0,r1,r2) _unltr_f(_jit,r0,r1,r2,1) +static void _unlti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unlti_f(r0,r1,i0) _unlti_f(_jit,r0,r1,i0) +static void _unlti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unlti_d(r0,r1,i0) _unlti_d(_jit,r0,r1,i0) +static void _unler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t, + jit_bool_t); +# define unler_f(r0,r1,r2) _unler_f(_jit,r0,r1,r2,0) +# define unler_d(r0,r1,r2) _unler_f(_jit,r0,r1,r2,1) +static void _unlei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unlei_f(r0,r1,i0) _unlei_f(_jit,r0,r1,i0) +static void _unlei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unlei_d(r0,r1,i0) _unlei_d(_jit,r0,r1,i0) +# define ungtr_f(r0,r1,r2) unltr_f(r0,r2,r1) +# define ungtr_d(r0,r1,r2) unltr_d(r0,r2,r1) +static void _ungti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ungti_f(r0,r1,i0) _ungti_f(_jit,r0,r1,i0) +static void _ungti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ungti_d(r0,r1,i0) _ungti_d(_jit,r0,r1,i0) +# define unger_f(r0,r1,r2) _unler_f(_jit,r0,r2,r1,0) +# define unger_d(r0,r1,r2) _unler_f(_jit,r0,r2,r1,1) +static void _ungei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ungei_f(r0,r1,i0) _ungei_f(_jit,r0,r1,i0) +static void _ungei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ungei_d(r0,r1,i0) _ungei_d(_jit,r0,r1,i0) +static void _uneqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t, + jit_bool_t); +# define uneqr_f(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2,0) +# define uneqr_d(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2,1) +static void _uneqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define uneqi_f(r0,r1,i0) _uneqi_f(_jit,r0,r1,i0) +static void _uneqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define uneqi_d(r0,r1,i0) _uneqi_d(_jit,r0,r1,i0) +static void _ltgtr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ltgtr_f(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2,0) +# define ltgtr_d(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2,1) +static void _ltgti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ltgti_f(r0,r1,i0) _ltgti_f(_jit,r0,r1,i0) +static void _ltgti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ltgti_d(r0,r1,i0) _ltgti_d(_jit,r0,r1,i0) +static void _ordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ordr_f(r0,r1,r2) _ordr_f(_jit,r0,r1,r2,0) +# define ordr_d(r0,r1,r2) _ordr_f(_jit,r0,r1,r2,1) +static void _ordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ordi_f(r0,r1,i0) _ordi_f(_jit,r0,r1,i0) +static void _ordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ordi_d(r0,r1,i0) _ordi_d(_jit,r0,r1,i0) +static void _unordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define unordr_f(r0,r1,r2) _unordr_f(_jit,r0,r1,r2,0) +# define unordr_d(r0,r1,r2) _unordr_f(_jit,r0,r1,r2,1) +static void _unordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unordi_f(r0,r1,i0) _unordi_f(_jit,r0,r1,i0) +static void _unordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unordi_d(r0,r1,i0) _unordi_d(_jit,r0,r1,i0) +static void _addr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t); +# define addr_f(r0,r1,r2) _addr_f(_jit,r0,r1,r2,0) +# define addr_d(r0,r1,r2) _addr_f(_jit,r0,r1,r2,1) +static void _addi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) +static void _addi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define addi_d(r0,r1,i0) _addi_d(_jit,r0,r1,i0) +static void _subr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr_f(r0,r1,r2) _subr_f(_jit,r0,r1,r2) +static void _subr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2) +static void _subi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define subi_f(r0,r1,i0) _subi_f(_jit,r0,r1,i0) +static void _subi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) +static void _negr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define negr_f(r0,r1) _negr_f(_jit,r0,r1) +static void _negr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define negr_d(r0,r1) _negr_d(_jit,r0,r1) +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +static void _rsbi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +static void _rsbi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _mulr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) +static void _muli_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) +static void _mulr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr_d(r0,r1,r2) _mulr_d(_jit,r0,r1,r2) +static void _muli_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define muli_d(r0,r1,i0) _muli_d(_jit,r0,r1,i0) +static void _divr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_f(r0,r1,r2) _divr_f(_jit,r0,r1,r2) +static void _divi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define divi_f(r0,r1,i0) _divi_f(_jit,r0,r1,i0) +static void _divr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_d(r0,r1,r2) _divr_d(_jit,r0,r1,r2) +static void _divi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define divi_d(r0,r1,i0) _divi_d(_jit,r0,r1,i0) +static void _movr_w_f(jit_state_t*,jit_uint16_t,jit_int16_t); +#define movr_w_f(r0,r1) _movr_w_f(_jit,r0,r1) +static void _movr_f_w(jit_state_t*,jit_uint16_t,jit_int16_t); +#define movr_f_w(r0,r1) _movr_f_w(_jit,r0,r1) +static void _movi_w_f(jit_state_t*,jit_int16_t,jit_word_t); +# define movi_w_f(r0,i0) _movi_w_f(_jit,r0,i0) +static void _movr_ww_d(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t); +# define movr_ww_d(r0,r1,r2) _movr_ww_d(_jit,r0,r1,r2) +static void _movr_d_ww(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t); +# define movr_d_ww(r0,r1,r2) _movr_d_ww(_jit,r0,r1,r2) +static void _movi_ww_d(jit_state_t*,jit_int16_t,jit_word_t, jit_word_t); +# define movi_ww_d(r0,i0,i1) _movi_ww_d(_jit,r0,i0,i1) +static void _absr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define absr_f(r0,r1) _absr_f(_jit,r0,r1) +static void _absr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define absr_d(r0,r1) _absr_d(_jit,r0,r1) +static void _sqrtr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define sqrtr_f(r0,r1) _sqrtr_f(_jit,r0,r1) +static void _sqrtr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) +static void _extr_d_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define extr_d_f(r0,r1) _extr_d_f(_jit,r0,r1) +static void _extr_f_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define extr_f_d(r0,r1) _extr_f_d(_jit,r0,r1) +# define ldr_f(r0,r1) LDF(r0,r1) +static void _ldr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_d(r0,r1) _ldr_d(_jit,r0,r1) +static void _ldi_f(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) +static void _ldi_d(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_d(r0,i0) _ldi_d(_jit,r0,i0) +static void _ldxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_f(r0,r1,r2) _ldxr_f(_jit,r0,r1,r2) +static void _ldxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_d(r0,r1,r2) _ldxr_d(_jit,r0,r1,r2) +static void _ldxi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) +static void _ldxi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0) +# define unldr_x(r0,r1,i0) fallback_unldr_x(r0,r1,i0) +# define unldi_x(r0,i0,i1) fallback_unldi_x(r0,i0,i1) +# define str_f(r0,r1) STF(r0,r1) +static void _str_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define str_d(r0,r1) _str_d(_jit,r0,r1) +static void _sti_f(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_f(i0,r0) _sti_f(_jit,i0,r0) +static void _sti_d(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_d(i0,r0) _sti_d(_jit,i0,r0) +static void _stxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2) +static void _stxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2) +static void _stxi_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) +static void _stxi_d(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) +# define unstr_x(r0,r1,i0) fallback_unstr_x(r0,r1,i0) +# define unsti_x(i0,r0,i1) fallback_unsti_x(i0,r0,i1) +static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define beqr_f(i0,r0,r1) beqr_f_p(i0,r0,r1,0) +# define bner_f(i0,r0,r1) bner_f_p(i0,r0,r1,0) +# define beqr_d(i0,r0,r1) beqr_f_p(i0,r0,r1,0) +# define bner_d(i0,r0,r1) bner_f_p(i0,r0,r1,0) +# define beqr_f_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,0,1,p) +# define bner_f_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,0,0,p) +# define beqr_d_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,1,1,p) +# define bner_d_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_uint16_t, + jit_float32_t,jit_bool_t,jit_bool_t); +# define beqi_f(i0,r0,i1) beqi_f_p(i0,r0,i1,0) +# define bnei_f(i0,r0,i1) bnei_f_p(i0,r0,i1,0) +# define beqi_f_p(i0,r0,i1,p) _beqi_f(_jit,i0,r0,i1,1,p) +# define bnei_f_p(i0,r0,i1,p) _beqi_f(_jit,i0,r0,i1,0,p) +static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_uint16_t, + jit_float64_t,jit_bool_t,jit_bool_t); +# define beqi_d(i0,r0,i1) beqi_d_p(i0,r0,i1,0) +# define bnei_d(i0,r0,i1) bnei_d_p(i0,r0,i1,0) +# define beqi_d_p(i0,r0,i1,p) _beqi_d(_jit,i0,r0,i1,1,p) +# define bnei_d_p(i0,r0,i1,p) _beqi_d(_jit,i0,r0,i1,0,p) +static jit_word_t +_blti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define blti_f(i0,r0,i1) blti_f_p(i0,r0,i1,0) +# define blti_f_p(i0,r0,i1,p) _blti_f(_jit,i0,r0,i1,p) +static jit_word_t +_blti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define blti_d(i0,r0,i1) blti_d_p(i0,r0,i1,0) +# define blti_d_p(i0,r0,i1,p) _blti_d(_jit,i0,r0,i1,p) +static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bgtr_f(i0,r0,r1) bgtr_f_p(i0,r0,r1,0) +# define bgtr_d(i0,r0,r1) bgtr_d_p(i0,r0,r1,0) +# define bltr_f(i0,r0,r1) bltr_f_p(i0,r1,r0,0) +# define bltr_d(i0,r0,r1) bltr_d_p(i0,r1,r0,0) +# define bgtr_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,0,1,p) +# define bgtr_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,1,1,p) +# define bltr_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,0,1,p) +# define bltr_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,1,1,p) +static jit_word_t +_bgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bgti_f(i0,r0,i1) bgti_f_p(i0,r0,i1,0) +# define bgti_f_p(i0,r0,i1,p) _bgti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bgti_d(i0,r0,i1) bgti_d_p(i0,r0,i1,0) +# define bgti_d_p(i0,r0,i1,p) _bgti_d(_jit,i0,r0,i1,p) +static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bler_f(i0,r0,r1) bler_f_p(i0,r0,r1,0) +# define bler_d(i0,r0,r1) bler_d_p(i0,r0,r1,0) +# define bler_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,0,0,p) +# define bler_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_blei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define blei_f(i0,r0,i1) blei_f_p(i0,r0,i1,0) +# define blei_f_p(i0,r0,i1,p) _blei_f(_jit,i0,r0,i1,p) +static jit_word_t +_blei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define blei_d(i0,r0,i1) blei_d_p(i0,r0,i1,0) +# define blei_d_p(i0,r0,i1,p) _blei_d(_jit,i0,r0,i1,p) +# define bger_f(i0,r0,r1) bger_f_p(i0,r1,r0,0) +# define bger_d(i0,r0,r1) bger_d_p(i0,r1,r0,0) +# define bger_f_p(i0,r0,r1,p) bler_f_p(i0,r1,r0,p) +# define bger_d_p(i0,r0,r1,p) bler_d_p(i0,r1,r0,p) +static jit_word_t +_bgei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bgei_f(i0,r0,i1) bgei_f_p(i0,r0,i1,0) +# define bgei_f_p(i0,r0,i1,p) _bgei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bgei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bgei_d(i0,r0,i1) bgei_d_p(i0,r0,i1,0) +# define bgei_d_p(i0,r0,i1,p) _bgei_d(_jit,i0,r0,i1,p) +# define bunltr_f(i0,r0,r1) bunltr_f_p(i0,r1,r0,0) +# define bunltr_d(i0,r0,r1) bunltr_d_p(i0,r1,r0,0) +# define bunltr_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r1,r0,0,1,p) +# define bunltr_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r1,r0,1,1,p) +static jit_word_t +_bunlti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunlti_f(i0,r0,i1) bunlti_f_p(i0,r0,i1,0) +# define bunlti_f_p(i0,r0,i1,p) _bunlti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunlti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunlti_d(i0,r0,i1) bunlti_d_p(i0,r0,i1,0) +# define bunlti_d_p(i0,r0,i1,p) _bunlti_d(_jit,i0,r0,i1,p) +# define bunler_f(i0,r0,r1) bunler_f_p(i0,r0,r1,0) +# define bunler_d(i0,r0,r1) bunler_d_p(i0,r0,r1,0) +# define bunler_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,0,0,p) +# define bunler_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_bunlei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunlei_f(i0,r0,i1) bunlei_f_p(i0,r0,i1,0) +# define bunlei_f_p(i0,r0,i1,p) _bunlei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunlei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunlei_d(i0,r0,i1) bunlei_d_p(i0,r0,i1,0) +# define bunlei_d_p(i0,r0,i1,p) _bunlei_d(_jit,i0,r0,i1,p) +# define bungtr_f(i0,r0,r1) bungtr_f_p(i0,r0,r1,0) +# define bungtr_d(i0,r0,r1) bungtr_d_p(i0,r0,r1,0) +# define bungtr_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,0,1,p) +# define bungtr_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,1,1,p) +static jit_word_t +_bungti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bungti_f(i0,r0,i1) bungti_f_p(i0,r0,i1,0) +# define bungti_f_p(i0,r0,i1,p) _bungti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bungti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bungti_d(i0,r0,i1) bungti_d_p(i0,r0,i1,0) +# define bungti_d_p(i0,r0,i1,p) _bungti_d(_jit,i0,r0,i1,p) +# define bunger_f(i0,r0,r1) bunger_f_p(i0,r1,r0,0) +# define bunger_d(i0,r0,r1) bunger_d_p(i0,r1,r0,0) +# define bunger_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,0,0,p) +# define bunger_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,1,0,p) +static jit_word_t +_bungei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bungei_f(i0,r0,i1) bungei_f_p(i0,r0,i1,0) +# define bungei_f_p(i0,r0,i1,p) _bungei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bungei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bungei_d(i0,r0,i1) bungei_d_p(i0,r0,i1,0) +# define bungei_d_p(i0,r0,i1,p) _bungei_d(_jit,i0,r0,i1,p) +static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int16_t, + jit_int16_t,jit_bool_t,jit_bool_t); +# define buneqr_f(i0,r0,r1) buneqr_f_p(i0,r1,r0,0) +# define buneqr_d(i0,r0,r1) buneqr_d_p(i0,r1,r0,0) +# define buneqr_f_p(i0,r0,r1,p) _buneqr_f(_jit,i0,r1,r0,0,p) +# define buneqr_d_p(i0,r0,r1,p) _buneqr_f(_jit,i0,r1,r0,1,p) +static jit_word_t +_buneqi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define buneqi_f(i0,r0,i1) buneqi_f_p(i0,r0,i1,0) +# define buneqi_f_p(i0,r0,i1,p) _buneqi_f(_jit,i0,r0,i1,p) +static jit_word_t +_buneqi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define buneqi_d(i0,r0,i1) buneqi_d_p(i0,r0,i1,0) +# define buneqi_d_p(i0,r0,i1,p) _buneqi_d(_jit,i0,r0,i1,p) +static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int16_t, + jit_int16_t,jit_bool_t,jit_bool_t); +# define bltgtr_f(i0,r0,r1) bltgtr_f_p(i0,r1,r0,0) +# define bltgtr_d(i0,r0,r1) bltgtr_d_p(i0,r1,r0,0) +# define bltgtr_f_p(i0,r0,r1,p) _bltgtr_f(_jit,i0,r1,r0,0,p) +# define bltgtr_d_p(i0,r0,r1,p) _bltgtr_f(_jit,i0,r1,r0,1,p) +static jit_word_t +_bltgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bltgti_f(i0,r0,i1) bltgti_f_p(i0,r0,i1,0) +# define bltgti_f_p(i0,r0,i1,p) _bltgti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bltgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bltgti_d(i0,r0,i1) bltgti_d_p(i0,r0,i1,0) +# define bltgti_d_p(i0,r0,i1,p) _bltgti_d(_jit,i0,r0,i1,p) +static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bordr_f(i0,r0,r1) bordr_f_p(i0,r0,r1,0) +# define bordr_d(i0,r0,r1) bordr_d_p(i0,r0,r1,0) +# define bordr_f_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,0,1,p) +# define bordr_d_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,1,1,p) +static jit_word_t +_bordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bordi_f(i0,r0,i1) bordi_f_p(i0,r0,i1,0) +# define bordi_f_p(i0,r0,i1,p) _bordi_f(_jit,i0,r0,i1,p) +static jit_word_t +_bordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bordi_d(i0,r0,i1) bordi_d_p(i0,r0,i1,0) +# define bordi_d_p(i0,r0,i1,p) _bordi_d(_jit,i0,r0,i1,p) +# define bunordr_f(i0,r0,r1) bunordr_f_p(i0,r0,r1,0) +# define bunordr_d(i0,r0,r1) bunordr_d_p(i0,r0,r1,0) +# define bunordr_f_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,0,0,p) +# define bunordr_d_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_bunordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunordi_f(i0,r0,i1) bunordi_f_p(i0,r0,i1,0) +# define bunordi_f_p(i0,r0,i1,p) _bunordi_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunordi_d(i0,r0,i1) bunordi_d_p(i0,r0,i1,0) +# define bunordi_d_p(i0,r0,i1,p) _bunordi_d(_jit,i0,r0,i1,p) +# define ldxbi_f(r0,r1,i0) generic_ldxbi_f(r0,r1,i0) +# define ldxbi_d(r0,r1,i0) generic_ldxbi_d(r0,r1,i0) +static void +_ldxai_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t); +# define ldxai_f(r0,r1,i0) _ldxai_f(_jit,r0,r1,i0) +static void +_ldxai_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t); +# define ldxai_d(r0,r1,i0) _ldxai_d(_jit,r0,r1,i0) +static void +_stxbi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t); +# define stxbi_f(i0,r0,r1) _stxbi_f(_jit,i0,r0,r1) +static void +_stxbi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t); +# define stxbi_d(i0,r0,r1) _stxbi_d(_jit,i0,r0,r1) +# define stxai_f(i0,r0,r1) generic_stxai_f(i0,r0,r1) +# define stxai_d(i0,r0,r1) generic_stxai_d(i0,r0,r1) +static void _vaarg_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define vaarg_d(r0, r1) _vaarg_d(_jit, r0, r1) +#endif /* PROTO */ + +#if CODE +static void set_fmode_mask(jit_state_t *_jit, jit_uint32_t mask, jit_bool_t no_r0) +{ + jit_uint16_t reg, reg2; + + if (SH_HAS_FPU && _jitc->uses_fpu) { + if (no_r0) { + reg = jit_get_reg(jit_class_gpr); + reg2 = jit_get_reg(jit_class_gpr); + + movi(rn(reg2), mask); + STSFP(rn(reg)); + xorr(rn(reg), rn(reg), rn(reg2)); + LDSFP(rn(reg)); + + jit_unget_reg(reg); + jit_unget_reg(reg2); + } else { + STSFP(_R0); + SWAPW(_R0, _R0); + XORI(mask >> 16); + SWAPW(_R0, _R0); + LDSFP(_R0); + } + } +} + +static void set_fmode(jit_state_t *_jit, jit_bool_t is_double) +{ + if (SH_HAS_FPU && !SH_SINGLE_ONLY && _jitc->uses_fpu && _jitc->mode_d != is_double) { + set_fmode_mask(_jit, PR_FLAG, 0); + _jitc->mode_d = is_double; + } +} + +static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0) +{ + if (SH_HAS_FPU && _jitc->uses_fpu) { + if (_jitc->mode_d != SH_DEFAULT_FPU_MODE) + set_fmode_mask(_jit, PR_FLAG | FR_FLAG, no_r0); + else if (SH_DEFAULT_FPU_MODE) + set_fmode_mask(_jit, FR_FLAG, no_r0); + else + maybe_emit_frchg(); + + _jitc->mode_d = SH_DEFAULT_FPU_MODE; + } +} + +static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double) +{ + if (SH_HAS_FPU && _jitc->uses_fpu && !SH_SINGLE_ONLY && _jitc->mode_d != is_double) { + set_fmode_mask(_jit, PR_FLAG, 1); + _jitc->mode_d = is_double; + } +} + +static void _extr_f(jit_state_t *_jit, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + LDS(r1); + FLOAT(r0); +} + +static void _truncr_f_i(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FTRC(r1); + STSUL(r0); +} + +static void _fmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + reg = jit_get_reg(jit_class_fpr); + mulr_f(rn(reg), r1, r2); + addr_f(r0, rn(reg), r3); + } else if (r0 == r2) { + movr_f(rn(reg), r2); + movr_f(r0, r3); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + movr_f(r0, r3); + FMAC(r0, r2); + } + + jit_unget_reg(reg); +} + +static void _fmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + if (r0 == r3) { + reg = jit_get_reg(jit_class_fpr); + + mulr_d(rn(reg), r1, r2); + addr_d(r0, rn(reg), r3); + + jit_unget_reg(reg); + } else { + mulr_d(r0, r1, r2); + addr_d(r0, r0, r3); + } +} + +static void _fmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + reg = jit_get_reg(jit_class_fpr); + mulr_f(rn(reg), r1, r2); + subr_f(r0, rn(reg), r3); + } else if (r0 == r2) { + movr_f(rn(reg), r2); + movr_f(r0, r3); + FNEG(r0); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + movr_f(r0, r3); + FNEG(r0); + FMAC(r0, r2); + } + + jit_unget_reg(reg); +} + +static void _fmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + if (r0 == r3) { + reg = jit_get_reg(jit_class_fpr); + + mulr_d(rn(reg), r1, r2); + subr_d(r0, rn(reg), r3); + + jit_unget_reg(reg); + } else { + mulr_d(r0, r1, r2); + subr_d(r0, r0, r3); + } +} + +static void _fnmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + fmsr_f(r0, r1, r2, r3); + negr_f(r0, r0); + } else { + if (r0 == r2) { + movr_f(rn(reg), r2); + FNEG(rn(reg)); + movr_f(r0, r3); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + FNEG(rn(reg)); + movr_f(r0, r3); + FMAC(r0, r2); + } + + jit_unget_reg(reg); + } +} + +static void _fnmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmsr_d(r0, r1, r2, r3); + negr_d(r0, r0); +} + +static void _fnmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmar_f(r0, r1, r2, r3); + negr_f(r0, r0); +} + +static void _fnmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmar_d(r0, r1, r2, r3); + negr_d(r0, r0); +} + +static void _movr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (r0 >= _XF0 || r1 >= _XF0) { + set_fmode(_jit, 0); + + if (r0 >= _XF0 && r1 >= _XF0) { + maybe_emit_frchg(); + FMOV(r0 - _XF0, r1 - _XF0); + FRCHG(); + } else if (r0 >= _XF0) { + FLDS(r1); + FRCHG(); + FSTS(r0 - _XF0); + FRCHG(); + } else { + maybe_emit_frchg(); + FLDS(r1 - _XF0); + FRCHG(); + FSTS(r0); + } + } else { + FMOV(r0, r1); + } + } +} + +static void _movr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else if (r0 >= _XF0 || r1 >= _XF0) { + set_fmode(_jit, 0); + maybe_emit_fschg(); + + if (r0 >= _XF0 && r1 >= _XF0) + FMOVXX(r0 - _XF0, r1 - _XF0); + else if (r0 >= _XF0) + FMOVXD(r0 - _XF0, r1); + else + FMOVDX(r0, r1 - _XF0); + + FSCHG(); + } else { + FMOV(r0, r1); + FMOV(r0 + 1, r1 + 1); + } + } +} + +static void _movi_f(jit_state_t *_jit, jit_uint16_t r0, jit_float32_t i0) +{ + jit_bool_t is_bank = r0 >= _XF0; + + set_fmode(_jit, 0); + + if (is_bank) { + maybe_emit_frchg(); + r0 -= _XF0; + } + + if (i0 == 0.0f) { + FLDI0(r0); + } else if (i0 == -0.0f) { + FLDI0(r0); + FNEG(r0); + } else if (i0 == 1.0f) { + FLDI1(r0); + } else if (i0 == -1.0f) { + FLDI1(r0); + FNEG(r0); + } else { + load_const_f(0, r0, i0); + } + + if (is_bank) + FRCHG(); +} + +static void _movi_d(jit_state_t *_jit, jit_uint16_t r0, jit_float64_t i0) +{ + union fl64 { + struct { + jit_uint32_t hi; + jit_uint32_t lo; + }; + jit_float64_t f; + }; + + if (SH_SINGLE_ONLY) { + movi_f(r0, (jit_float32_t)i0); + } else if (r0 >= _XF0) { + set_fmode(_jit, 0); + maybe_emit_frchg(); + + movi_w_f(r0 + 1 - _XF0, ((union fl64)i0).hi); + movi_w_f(r0 - _XF0, ((union fl64)i0).lo); + + FRCHG(); + } else { + movi_w_f(r0 + 1, ((union fl64)i0).hi); + movi_w_f(r0, ((union fl64)i0).lo); + } +} + +static void _ltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FCMPGT(r2, r1); + MOVT(r0); +} + +static void +_lti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + + ltr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_lti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + + ltr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _ler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + set_fmode(_jit, is_double); + + MOVI(_R0, 0); + FCMPEQ(r1, r1); + BF(5); + FCMPEQ(r2, r2); + BF(3); + + FCMPGT(r1, r2); + MOVT(_R0); + BRA(13 + is_double); + XORI(1); + + if (is_double) + movr_w_f(rn(reg), _R0); + else + FLDI0(rn(reg)); + FCMPGT(rn(reg), r1); + MOVT(_R0); + FCMPGT(r1, rn(reg)); + ROTL(_R0); + TST(_R0, _R0); + BT(5); + + FCMPGT(rn(reg), r2); + MOVT(_R0); + FCMPGT(r2, rn(reg)); + ROTL(_R0); + TST(_R0, _R0); + BF(-18 - is_double); + + movr(r0, _R0); + + jit_unget_reg(reg); +} + +static void +_lei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ler_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_lei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ler_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _eqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FCMPEQ(r1, r2); + MOVT(r0); +} + +static void +_eqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + eqr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_eqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + eqr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ger_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ger_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + gtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + gtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ner_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _eqr_f(_jit, _R0, r1, r2, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_nei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ner_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_nei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ner_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _ler_f(_jit, _R0, r2, r1, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_unlti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unltr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unlti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unltr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _ltr_f(_jit, _R0, r2, r1, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_unlei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unler_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unlei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unler_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ungtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ungtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unger_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unger_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _unler_f(_jit, rn(reg), r2, r1, is_double); + _unler_f(_jit, r0, r1, r2, is_double); + andr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + uneqr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + uneqr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ltgtr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _uneqr_f(_jit, r0, r1, r2, is_double); + xori(r0, r0, 1); +} + +static void +_ltgti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ltgtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ltgti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ltgtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _eqr_f(_jit, rn(reg), r1, r1, is_double); + _eqr_f(_jit, r0, r2, r2, is_double); + andr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ordr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ordr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _ner_f(_jit, rn(reg), r1, r1, is_double); + _ner_f(_jit, r0, r2, r2, is_double); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unordr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unordr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_addr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + if (r0 == r2) { + FADD(r0, r1); + } else { + if (is_double) + movr_d(r0, r1); + else + movr_f(r0, r1); + FADD(r0, r2); + } +} + +static void +_addi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + FADD(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + FADD(r0, r1); + } +} + +static void _addi_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + FADD(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + FADD(r0, r1); + } +} + +static void +_subr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r1 == r2) { + movi_f(r0, 0.0f); + } else if (r0 == r2) { + FNEG(r0); + FADD(r0, r1); + } else { + movr_f(r0, r1); + FSUB(r0, r2); + } +} + +static void +_subr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r1 == r2) { + movi_d(r0, 0.0); + } else if (r0 == r2) { + FNEG(r0); + FADD(r0, r1); + } else { + movr_d(r0, r1); + FSUB(r0, r2); + } +} + +static void +_subi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + FSUB(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, -i0); + FADD(r0, r1); + } +} + +static void +_subi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + FSUB(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, -i0); + FADD(r0, r1); + } +} + +static void +_rsbi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + subr_f(r0, rn(reg), r0); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + FSUB(r0, r1); + } +} + +static void +_rsbi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + subr_d(r0, rn(reg), r0); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + FSUB(r0, r1); + } +} + +static void +_mulr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + set_fmode(_jit, 0); + + if (r0 == r2) { + FMUL(r0, r1); + } else { + movr_f(r0, r1); + FMUL(r0, r2); + } +} + +static void +_muli_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + mulr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + mulr_f(r0, r0, r1); + } +} + +static void +_mulr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + set_fmode(_jit, 1); + + if (r0 == r2) { + FMUL(r0, r1); + } else { + movr_d(r0, r1); + FMUL(r0, r2); + } +} + +static void +_muli_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + mulr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + mulr_d(r0, r0, r1); + } +} + +static void +_divr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + + movr_f(rn(reg), r2); + movr_f(r0, r1); + FDIV(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movr_f(r0, r1); + FDIV(r0, r2); + } +} + +static void +_divi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + divr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_divr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + + movr_d(rn(reg), r2); + movr_d(r0, r1); + FDIV(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movr_d(r0, r1); + FDIV(r0, r2); + } +} + +static void +_divi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + divr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _absr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FABS(r0); +} + +static void _absr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FABS(r0); +} + +static void _sqrtr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FSQRT(r0); +} + +static void _sqrtr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FSQRT(r0); +} + +static void _negr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FNEG(r0); +} + +static void _negr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FNEG(r0); +} + +static void _extr_d_f(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else { + set_fmode(_jit, 1); + FCNVDS(r1); + set_fmode(_jit, 0); + FSTS(r0); + } +} + +static void _extr_f_d(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else { + set_fmode(_jit, 0); + FLDS(r1); + set_fmode(_jit, 1); + FCNVSD(r0); + } +} + +static void _ldr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + ldr_f(r0, r1); + } else { + movr(_R0, r1); + LDFS(r0 + 1, _R0); + LDF(r0, _R0); + } +} + +static void _ldi_f(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_f(r0, _R0); +} + +static void _ldi_d(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_d(r0, _R0); +} + +static void _ldxr_f(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_uint16_t r2) +{ + movr(_R0, r2); + LDXF(r0, r1); +} + +static void _ldxr_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_uint16_t r2) +{ + if (SH_SINGLE_ONLY) { + ldxr_f(r0, r1, r2); + } else { + addr(_R0, r1, r2); + ldr_d(r0, _R0); + } +} + +static void _ldxi_f(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_word_t i0) +{ + movi(_R0, i0); + ldxr_f(r0, r1, _R0); +} + +static void _ldxi_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_word_t i0) +{ + movi(_R0, i0); + ldxr_d(r0, r1, _R0); +} + +static void _str_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + str_f(r0, r1); + } else { + STF(r0, r1 + 1); + movi(_R0, 4); + STXF(r0, r1); + } +} + +static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + movi(_R0, i0); + STF(_R0, r0); +} + +static void _sti_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + if (SH_SINGLE_ONLY) { + sti_f(i0, r0); + } else { + movi(_R0, i0 + 8); + STFS(_R0, r0); + STFS(_R0, r0 + 1); + } +} + +static void _stxr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2) +{ + movr(_R0, r0); + STXF(r1, r2); +} + +static void _stxr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2) +{ + if (SH_SINGLE_ONLY) { + stxr_f(r0, r1, r2); + } else { + movr(_R0, r0); + STXF(r1, r2 + 1); + addi(_R0, _R0, 4); + STXF(r1, r2); + } +} + +static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1) +{ + movi(_R0, i0); + stxr_f(_R0, r0, r1); +} + +static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1) +{ + movi(_R0, i0); + stxr_d(_R0, r0, r1); +} + +static jit_word_t _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPEQ(r0, r1); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_float32_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i1); + + FCMPEQ(r0, rn(reg)); + jit_unget_reg(reg); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_float64_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + jit_uint16_t reg; + + set_fmode(_jit, 1); + + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i1); + + FCMPEQ(r0, rn(reg)); + jit_unget_reg(reg); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPGT(r0, r1); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_blti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bltr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_blti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bltr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bgtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bgtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPGT(r1, r0); + MOVT(_R0); + FCMPEQ(r0, r1); + ROTCL(_R0); + TSTI(3); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_blei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bler_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_blei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bler_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bger_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bger_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, jit_bool_t p) +{ + jit_word_t w; + + _uneqr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, jit_bool_t p) +{ + jit_word_t w; + + _ltgtr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + _ordr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, !set, p); + + return (w); +} + +static jit_word_t +_bunlti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunltr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunltr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunler_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunler_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bungtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bungtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunger_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunger_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_buneqi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = buneqr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_buneqi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = buneqr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bltgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bltgtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bltgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bltgtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bordr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bordr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunordr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunordr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static void +_ldxai_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0) +{ + if (i0 == 4) + LDFS(r0, r1); + else + generic_ldxai_f(r0, r1, i0); +} + +static void +_ldxai_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0) +{ + if (SH_SINGLE_ONLY) { + ldxai_f(r0, r1, i0); + } else if (i0 == 8) { + LDFS(r0 + 1, r1); + LDFS(r0, r1); + } else { + generic_ldxai_d(r0, r1, i0); + } +} + +static void +_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1) +{ + if (i0 == -4) + STFS(r0, r1); + else + generic_stxbi_f(i0, r0, r1); +} + +static void +_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1) +{ + if (SH_SINGLE_ONLY) { + stxbi_f(i0, r0, r1); + } else if (i0 == -8) { + STFS(r0, r1); + STFS(r0, r1 + 1); + } else { + generic_stxbi_d(i0, r0, r1); + } +} + +static void _movr_w_f(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1) +{ + LDS(r1); + FSTS(r0); +} + +static void _movr_f_w(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1) +{ + FLDS(r1); + STSUL(r0); +} + +static void _movi_w_f(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + movr_w_f(r0, _R0); +} + +static void _movr_ww_d(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2) +{ + /* TODO: single-only */ + movr_w_f(r0 + 1, r1); + movr_w_f(r0, r2); +} + +static void _movr_d_ww(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2) +{ + /* TODO: single-only */ + movr_f_w(r0, r2 + 1); + movr_f_w(r1, r2); +} + +static void _movi_ww_d(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0, jit_word_t i1) +{ + /* TODO: single-only */ + movi_w_f(r0, i1); + movi_w_f(r0 + 1, i0); +} + +static void +_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t rg0, rg1; + jit_word_t ge_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg(jit_class_gpr); + rg1 = jit_get_reg(jit_class_gpr); + + /* Load begin/end gpr pointers */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, efpr)); + movi(_R0, offsetof(jit_va_list_t, bfpr)); + ldxr(rn(rg0), r1, _R0); + + /* Check that we didn't reach the end gpr pointer. */ + CMPHS(rn(rg0), rn(rg1)); + + ge_code = _jit->pc.w; + BF(0); + + /* If we did, load the stack pointer instead. */ + movi(_R0, offsetof(jit_va_list_t, over)); + ldxr(rn(rg0), r1, _R0); + + patch_at(ge_code, _jit->pc.w); + + /* All good, we can now load the actual value */ + ldxai_d(r0, rn(rg0), sizeof(jit_float64_t)); + + /* Update the pointer (gpr or stack) to the next word */ + stxr(_R0, r1, rn(rg0)); + + jit_unget_reg(rg0); + jit_unget_reg(rg1); +} + +#endif /* CODE */ diff --git a/lib/jit_sh-sz.c b/lib/jit_sh-sz.c new file mode 100644 index 0000000..0b02cbf --- /dev/null +++ b/lib/jit_sh-sz.c @@ -0,0 +1,598 @@ +#define JIT_INSTR_MAX 116 + 0, /* data */ + 0, /* live */ + 4, /* align */ + 0, /* save */ + 0, /* load */ + 4, /* skip */ + 0, /* #name */ + 0, /* #note */ + 0, /* label */ + 40, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 10, /* addi */ + 6, /* addcr */ + 12, /* addci */ + 4, /* addxr */ + 10, /* addxi */ + 4, /* subr */ + 10, /* subi */ + 8, /* subcr */ + 14, /* subci */ + 6, /* subxr */ + 12, /* subxi */ + 10, /* rsbi */ + 4, /* mulr */ + 10, /* muli */ + 6, /* qmulr */ + 12, /* qmuli */ + 6, /* qmulr_u */ + 12, /* qmuli_u */ + 34, /* divr */ + 40, /* divi */ + 24, /* divr_u */ + 30, /* divi_u */ + 44, /* qdivr */ + 50, /* qdivi */ + 34, /* qdivr_u */ + 40, /* qdivi_u */ + 44, /* remr */ + 50, /* remi */ + 34, /* remr_u */ + 40, /* remi_u */ + 4, /* andr */ + 10, /* andi */ + 4, /* orr */ + 10, /* ori */ + 4, /* xorr */ + 10, /* xori */ +# if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) + 6, /* lshr */ + 12, /* lshi */ + 6, /* rshr */ + 12, /* rshi */ + 6, /* rshr_u */ + 12, /* rshi_u */ +#else + 16, /* lshr */ + 22, /* lshi */ + 14, /* rshr */ + 20, /* rshi */ + 14, /* rshr_u */ + 20, /* rshi_u */ +#endif + 2, /* negr */ + 4, /* negi */ + 2, /* comr */ + 4, /* comi */ + 4, /* ltr */ + 4, /* lti */ + 4, /* ltr_u */ + 4, /* lti_u */ + 4, /* ler */ + 10, /* lei */ + 4, /* ler_u */ + 10, /* lei_u */ + 4, /* eqr */ + 10, /* eqi */ + 4, /* ger */ + 10, /* gei */ + 4, /* ger_u */ + 10, /* gei_u */ + 4, /* gtr */ + 10, /* gti */ + 4, /* gtr_u */ + 10, /* gti_u */ + 6, /* ner */ + 12, /* nei */ + 2, /* movr */ + 6, /* movi */ + 6, /* movnr */ + 6, /* movzr */ + 24, /* casr */ + 30, /* casi */ + 2, /* extr_c */ + 4, /* exti_c */ + 2, /* extr_uc */ + 4, /* exti_uc */ + 2, /* extr_s */ + 4, /* exti_s */ + 2, /* extr_us */ + 4, /* exti_us */ + 4, /* extr_i */ + 0, /* exti_i */ + 8, /* extr_ui */ + 0, /* exti_ui */ + 4, /* bswapr_us */ + 4, /* bswapi_us */ + 6, /* bswapr_ui */ + 8, /* bswapi_ui */ + 0, /* bswapr_ul */ + 0, /* bswapi_ul */ + 4, /* htonr_us */ + 4, /* htoni_us */ + 6, /* htonr_ui */ + 8, /* htoni_ui */ + 0, /* htonr_ul */ + 0, /* htoni_ul */ + 2, /* ldr_c */ + 12, /* ldi_c */ + 4, /* ldr_uc */ + 12, /* ldi_uc */ + 2, /* ldr_s */ + 12, /* ldi_s */ + 4, /* ldr_us */ + 12, /* ldi_us */ + 2, /* ldr_i */ + 12, /* ldi_i */ + 2, /* ldr_ui */ + 12, /* ldi_ui */ + 0, /* ldr_l */ + 0, /* ldi_l */ + 4, /* ldxr_c */ + 16, /* ldxi_c */ + 6, /* ldxr_uc */ + 16, /* ldxi_uc */ + 4, /* ldxr_s */ + 16, /* ldxi_s */ + 6, /* ldxr_us */ + 16, /* ldxi_us */ + 4, /* ldxr_i */ + 16, /* ldxi_i */ + 4, /* ldxr_ui */ + 16, /* ldxi_ui */ + 0, /* ldxr_l */ + 0, /* ldxi_l */ + 2, /* str_c */ + 12, /* sti_c */ + 2, /* str_s */ + 12, /* sti_s */ + 2, /* str_i */ + 12, /* sti_i */ + 0, /* str_l */ + 0, /* sti_l */ + 4, /* stxr_c */ + 16, /* stxi_c */ + 4, /* stxr_s */ + 16, /* stxi_s */ + 4, /* stxr_i */ + 16, /* stxi_i */ + 0, /* stxr_l */ + 0, /* stxi_l */ + 6, /* bltr */ + 8, /* blti */ + 6, /* bltr_u */ + 8, /* blti_u */ + 6, /* bler */ + 8, /* blei */ + 6, /* bler_u */ + 8, /* blei_u */ + 6, /* beqr */ + 28, /* beqi */ + 6, /* bger */ + 8, /* bgei */ + 6, /* bger_u */ + 8, /* bgei_u */ + 6, /* bgtr */ + 8, /* bgti */ + 6, /* bgtr_u */ + 8, /* bgti_u */ + 6, /* bner */ + 20, /* bnei */ + 6, /* bmsr */ + 12, /* bmsi */ + 6, /* bmcr */ + 12, /* bmci */ + 8, /* boaddr */ + 36, /* boaddi */ + 8, /* boaddr_u */ + 20, /* boaddi_u */ + 8, /* bxaddr */ + 36, /* bxaddi */ + 8, /* bxaddr_u */ + 20, /* bxaddi_u */ + 10, /* bosubr */ + 36, /* bosubi */ + 8, /* bosubr_u */ + 20, /* bosubi_u */ + 10, /* bxsubr */ + 36, /* bxsubi */ + 8, /* bxsubr_u */ + 20, /* bxsubi_u */ + 4, /* jmpr */ + 10, /* jmpi */ + 4, /* callr */ + 10, /* calli */ + 0, /* prepare */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 22, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 4, /* addr_f */ + 12, /* addi_f */ + 4, /* subr_f */ + 12, /* subi_f */ + 12, /* rsbi_f */ + 4, /* mulr_f */ + 12, /* muli_f */ + 4, /* divr_f */ + 12, /* divi_f */ + 4, /* negr_f */ + 0, /* negi_f */ + 4, /* absr_f */ + 0, /* absi_f */ + 4, /* sqrtr_f */ + 0, /* sqrti_f */ + 4, /* ltr_f */ + 12, /* lti_f */ + 4, /* ler_f */ + 12, /* lei_f */ + 4, /* eqr_f */ + 12, /* eqi_f */ + 4, /* ger_f */ + 12, /* gei_f */ + 4, /* gtr_f */ + 12, /* gti_f */ + 8, /* ner_f */ + 16, /* nei_f */ + 28, /* unltr_f */ + 36, /* unlti_f */ + 28, /* unler_f */ + 36, /* unlei_f */ + 28, /* uneqr_f */ + 36, /* uneqi_f */ + 28, /* unger_f */ + 36, /* ungei_f */ + 28, /* ungtr_f */ + 36, /* ungti_f */ + 40, /* ltgtr_f */ + 48, /* ltgti_f */ + 28, /* ordr_f */ + 36, /* ordi_f */ + 20, /* unordr_f */ + 28, /* unordi_f */ + 4, /* truncr_f_i */ + 4, /* truncr_f_l */ + 4, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 8, /* movi_f */ + 4, /* ldr_f */ + 12, /* ldi_f */ + 8, /* ldxr_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 12, /* sti_f */ + 8, /* stxr_f */ + 16, /* stxi_f */ + 8, /* bltr_f */ + 16, /* blti_f */ + 8, /* bler_f */ + 16, /* blei_f */ + 8, /* beqr_f */ + 16, /* beqi_f */ + 8, /* bger_f */ + 16, /* bgei_f */ + 8, /* bgtr_f */ + 16, /* bgti_f */ + 8, /* bner_f */ + 16, /* bnei_f */ + 32, /* bunltr_f */ + 40, /* bunlti_f */ + 32, /* bunler_f */ + 40, /* bunlei_f */ + 32, /* buneqr_f */ + 40, /* buneqi_f */ + 32, /* bunger_f */ + 40, /* bungei_f */ + 32, /* bungtr_f */ + 40, /* bungti_f */ + 44, /* bltgtr_f */ + 52, /* bltgti_f */ + 32, /* bordr_f */ + 40, /* bordi_f */ + 24, /* bunordr_f */ + 32, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 24, /* addi_d */ + 4, /* subr_d */ + 24, /* subi_d */ + 24, /* rsbi_d */ + 4, /* mulr_d */ + 24, /* muli_d */ + 4, /* divr_d */ + 24, /* divi_d */ + 4, /* negr_d */ + 0, /* negi_d */ + 4, /* absr_d */ + 0, /* absi_d */ + 4, /* sqrtr_d */ + 0, /* sqrti_d */ + 4, /* ltr_d */ + 24, /* lti_d */ + 4, /* ler_d */ + 24, /* lei_d */ + 4, /* eqr_d */ + 24, /* eqi_d */ + 4, /* ger_d */ + 24, /* gei_d */ + 4, /* gtr_d */ + 24, /* gti_d */ + 8, /* ner_d */ + 28, /* nei_d */ + 28, /* unltr_d */ + 48, /* unlti_d */ + 28, /* unler_d */ + 48, /* unlei_d */ + 28, /* uneqr_d */ + 48, /* uneqi_d */ + 28, /* unger_d */ + 48, /* ungei_d */ + 28, /* ungtr_d */ + 48, /* ungti_d */ + 40, /* ltgtr_d */ + 60, /* ltgti_d */ + 28, /* ordr_d */ + 48, /* ordi_d */ + 20, /* unordr_d */ + 40, /* unordi_d */ + 4, /* truncr_d_i */ + 4, /* truncr_d_l */ + 4, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 20, /* movi_d */ + 4, /* ldr_d */ + 12, /* ldi_d */ + 8, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 12, /* sti_d */ + 8, /* stxr_d */ + 16, /* stxi_d */ + 8, /* bltr_d */ + 28, /* blti_d */ + 8, /* bler_d */ + 28, /* blei_d */ + 8, /* beqr_d */ + 28, /* beqi_d */ + 8, /* bger_d */ + 28, /* bgei_d */ + 8, /* bgtr_d */ + 28, /* bgti_d */ + 8, /* bner_d */ + 28, /* bnei_d */ + 32, /* bunltr_d */ + 52, /* bunlti_d */ + 32, /* bunler_d */ + 52, /* bunlei_d */ + 32, /* buneqr_d */ + 52, /* buneqi_d */ + 32, /* bunger_d */ + 52, /* bungei_d */ + 32, /* bungtr_d */ + 52, /* bungti_d */ + 44, /* bltgtr_d */ + 64, /* bltgti_d */ + 32, /* bordr_d */ + 52, /* bordi_d */ + 24, /* bunordr_d */ + 44, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 4, /* movr_w_f */ + 8, /* movi_w_f */ + 0, /* movr_ww_d */ + 16, /* movi_ww_d */ + 4, /* movr_w_d */ + 0, /* movi_w_d */ + 0, /* movr_f_w */ + 4, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 4, /* movr_d_w */ + 16, /* movi_d_w */ + 10, /* clor */ + 6, /* cloi */ + 12, /* clzr */ + 6, /* clzi */ + 10, /* ctor */ + 6, /* ctoi */ + 12, /* ctzr */ + 6, /* ctzi */ + 14, /* rbitr */ + 6, /* rbiti */ + 14, /* popcntr */ + 6, /* popcnti */ + 14, /* lrotr */ + 14, /* lroti */ + 14, /* rrotr */ + 14, /* rroti */ + 8, /* extr */ + 6, /* exti */ + 4, /* extr_u */ + 6, /* exti_u */ + 4, /* depr */ + 10, /* depi */ + 18, /* qlshr */ + 8, /* qlshi */ + 18, /* qlshr_u */ + 8, /* qlshi_u */ + 18, /* qrshr */ + 8, /* qrshi */ + 18, /* qrshr_u */ + 8, /* qrshi_u */ + 16, /* unldr */ + 20, /* unldi */ + 16, /* unldr_u */ + 20, /* unldi_u */ + 44, /* unstr */ + 28, /* unsti */ + 32, /* unldr_x */ + 40, /* unldi_x */ + 28, /* unstr_x */ + 40, /* unsti_x */ + 4, /* fmar_f */ + 0, /* fmai_f */ + 4, /* fmsr_f */ + 0, /* fmsi_f */ + 4, /* fmar_d */ + 0, /* fmai_d */ + 4, /* fmsr_d */ + 0, /* fmsi_d */ + 4, /* fnmar_f */ + 0, /* fnmai_f */ + 4, /* fnmsr_f */ + 0, /* fnmsi_f */ + 4, /* fnmar_d */ + 0, /* fnmai_d */ + 4, /* fnmsr_d */ + 0, /* fnmsi_d */ + 8, /* hmulr */ + 16, /* hmuli */ + 8, /* hmulr_u */ + 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 18, /* ldxbi_c */ + 12, /* ldxar_c */ + 18, /* ldxai_c */ + 4, /* ldxbr_uc */ + 18, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 18, /* ldxai_uc */ + 4, /* ldxbr_s */ + 18, /* ldxbi_s */ + 8, /* ldxar_s */ + 18, /* ldxai_s */ + 4, /* ldxbr_us */ + 18, /* ldxbi_us */ + 8, /* ldxar_us */ + 18, /* ldxai_us */ + 4, /* ldxbr_i */ + 18, /* ldxbi_i */ + 8, /* ldxar_i */ + 18, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 4, /* ldxbr_f */ + 18, /* ldxbi_f */ + 8, /* ldxar_f */ + 18, /* ldxai_f */ + 4, /* ldxbr_d */ + 18, /* ldxbi_d */ + 8, /* ldxar_d */ + 18, /* ldxai_d */ + 4, /* stxbr_c */ + 18, /* stxbi_c */ + 8, /* stxar_c */ + 18, /* stxai_c */ + 4, /* stxbr_s */ + 18, /* stxbi_s */ + 8, /* stxar_s */ + 18, /* stxai_s */ + 4, /* stxbr_i */ + 18, /* stxbi_i */ + 8, /* stxar_i */ + 18, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 4, /* stxbr_f */ + 18, /* stxbi_f */ + 8, /* stxar_f */ + 18, /* stxai_f */ + 4, /* stxbr_d */ + 18, /* stxbi_d */ + 8, /* stxar_d */ + 18, /* stxai_d */ diff --git a/lib/jit_sh.c b/lib/jit_sh.c new file mode 100644 index 0000000..9806f14 --- /dev/null +++ b/lib/jit_sh.c @@ -0,0 +1,2215 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +# define NUM_WORD_ARGS 4 +# define NUM_FLOAT_ARGS 8 +# define STACK_SLOT 4 +# define STACK_SHIFT 2 + +#define jit_arg_reg_p(i) ((i) >= 0 && (i) < NUM_WORD_ARGS) +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < NUM_FLOAT_ARGS) + +#define fpr_args_inverted() (__BYTE_ORDER == __LITTLE_ENDIAN && !SH_SINGLE_ONLY) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define C_DISP 0 +# define S_DISP 0 +# define I_DISP 0 +#else +# define C_DISP STACK_SLOT - sizeof(jit_int8_t) +# define S_DISP STACK_SLOT - sizeof(jit_int16_t) +# define I_DISP STACK_SLOT - sizeof(jit_int32_t) +#endif + +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); +#define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) +static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); +#define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) +static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*); +#define load_const(uniq,r0,i0) _load_const(_jit,uniq,r0,i0) +static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t); +#define load_const_f(uniq,r0,i0) _load_const_f(_jit,uniq,r0,i0) +static void _load_const_f(jit_state_t*,jit_bool_t,jit_int32_t,jit_float32_t); +#define flush_consts(force) _flush_consts(_jit,force) +static void _flush_consts(jit_state_t*,jit_bool_t); +#define invalidate_consts() _invalidate_consts(_jit) +static void _invalidate_consts(jit_state_t*); +#define patch(instr, node) _patch(_jit, instr, node) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*); + +#define PROTO 1 +# include "jit_rewind.c" +# include "jit_sh-cpu.c" +# include "jit_sh-fpu.c" +# include "jit_fallback.c" +#undef PROTO + +jit_register_t _rvs[] = { + { 0x0, "r0" }, + { rc(gpr) | 0x1, "r1" }, + { rc(gpr) | 0x2, "r2" }, + { rc(gpr) | 0x3, "r3" }, + { rc(arg) | rc(gpr) | 0x4, "r4" }, + { rc(arg) | rc(gpr) | 0x5, "r5" }, + { rc(arg) | rc(gpr) | 0x6, "r6" }, + { rc(arg) | rc(gpr) | 0x7, "r7" }, + { rc(sav) | rc(gpr) | 0x8, "r8" }, + { rc(sav) | rc(gpr) | 0x9, "r9" }, + { rc(sav) | rc(gpr) | 0xa, "r10" }, + { rc(sav) | rc(gpr) | 0xb, "r11" }, + { rc(sav) | rc(gpr) | 0xc, "r12" }, + { rc(sav) | rc(gpr) | 0xd, "r13" }, + { rc(sav) | 0xe, "r14" }, + { rc(sav) | 0xf, "r15" }, + { 0x10, "gbr" }, + + /* Only use half of the floating-point registers. + * This makes it much easier to switch between + * float and double processing. */ + { rc(fpr) | 0x0, "$f0" }, + { 0x1, "$f1" }, + { rc(fpr) | 0x2, "$f2" }, + { 0x3, "$f3" }, + { rc(fpr) | 0x4, "$f4" }, + { 0x5, "$f5" }, + { rc(fpr) | 0x6, "$f6" }, + { 0x7, "$f7" }, + { rc(fpr) | 0x8, "$f8" }, + { 0x9, "$f9" }, + { rc(fpr) | 0xa, "$f10" }, + { 0xb, "$f11" }, + { rc(fpr) | 0xc, "$f12" }, + { 0xd, "$f13" }, + { rc(fpr) | 0xe, "$f14" }, + { 0xf, "$f15" }, + + { _XF0, "$xf0" }, + { _XF1, "$xf1" }, + { _XF2, "$xf2" }, + { _XF3, "$xf3" }, + { _XF4, "$xf4" }, + { _XF5, "$xf5" }, + { _XF6, "$xf6" }, + { _XF7, "$xf7" }, + { _XF8, "$xf8" }, + { _XF9, "$xf9" }, + { _XF10, "$xf10" }, + { _XF11, "$xf11" }, + { rc(sav) | _XF12, "$xf12" }, + { rc(sav) | _XF13, "$xf13" }, + { rc(sav) | _XF14, "$xf14" }, + { rc(sav) | _XF15, "$xf15" }, +}; + +typedef struct jit_va_list { + jit_pointer_t bgpr; + jit_pointer_t egpr; + jit_pointer_t bfpr; + jit_pointer_t efpr; + jit_pointer_t over; +} jit_va_list_t; + +static jit_bool_t jit_uses_fpu(jit_code_t code) +{ + switch (code) { + case jit_code_retr_f: + case jit_code_retr_d: + case jit_code_pushargr_f: + case jit_code_pushargr_d: + case jit_code_reti_f: + case jit_code_pushargi_f: + case jit_code_reti_d: + case jit_code_pushargi_d: + case jit_code_arg_f: + case jit_code_arg_d: + case jit_code_retval_f: + case jit_code_retval_d: + case jit_code_getarg_f: + case jit_code_getarg_d: + case jit_code_putargr_f: + case jit_code_putargr_d: + case jit_code_putargi_f: + case jit_code_putargi_d: + case jit_code_ldi_f: + case jit_code_ldi_d: + case jit_code_movi_w_f: + case jit_code_movi_w_d: + case jit_code_movi_ww_d: + case jit_code_movi_f: + case jit_code_movi_f_w: + case jit_code_negi_f: + case jit_code_absi_f: + case jit_code_sqrti_f: + case jit_code_movi_d: + case jit_code_movi_d_w: + case jit_code_negi_d: + case jit_code_absi_d: + case jit_code_sqrti_d: + case jit_code_truncr_f_i: + case jit_code_truncr_f_l: + case jit_code_truncr_d_i: + case jit_code_truncr_d_l: + case jit_code_negr_f: + case jit_code_absr_f: + case jit_code_sqrtr_f: + case jit_code_movr_f: + case jit_code_extr_f: + case jit_code_extr_d_f: + case jit_code_ldr_f: + case jit_code_negr_d: + case jit_code_absr_d: + case jit_code_sqrtr_d: + case jit_code_movr_d: + case jit_code_extr_d: + case jit_code_extr_f_d: + case jit_code_ldr_d: + case jit_code_movr_w_f: + case jit_code_movr_f_w: + case jit_code_movr_w_d: + case jit_code_movr_d_w: + case jit_code_va_arg_d: + case jit_code_ldxi_f: + case jit_code_ldxi_d: + case jit_code_addi_f: + case jit_code_subi_f: + case jit_code_rsbi_f: + case jit_code_muli_f: + case jit_code_divi_f: + case jit_code_lti_f: + case jit_code_lei_f: + case jit_code_eqi_f: + case jit_code_gei_f: + case jit_code_gti_f: + case jit_code_nei_f: + case jit_code_unlti_f: + case jit_code_unlei_f: + case jit_code_uneqi_f: + case jit_code_ungei_f: + case jit_code_ungti_f: + case jit_code_ltgti_f: + case jit_code_ordi_f: + case jit_code_unordi_f: + case jit_code_addi_d: + case jit_code_subi_d: + case jit_code_rsbi_d: + case jit_code_muli_d: + case jit_code_divi_d: + case jit_code_lti_d: + case jit_code_lei_d: + case jit_code_eqi_d: + case jit_code_gei_d: + case jit_code_gti_d: + case jit_code_nei_d: + case jit_code_unlti_d: + case jit_code_unlei_d: + case jit_code_uneqi_d: + case jit_code_ungei_d: + case jit_code_ungti_d: + case jit_code_ltgti_d: + case jit_code_ordi_d: + case jit_code_unordi_d: + case jit_code_addr_f: + case jit_code_subr_f: + case jit_code_mulr_f: + case jit_code_divr_f: + case jit_code_ltr_f: + case jit_code_ler_f: + case jit_code_eqr_f: + case jit_code_ger_f: + case jit_code_gtr_f: + case jit_code_ner_f: + case jit_code_unltr_f: + case jit_code_unler_f: + case jit_code_uneqr_f: + case jit_code_unger_f: + case jit_code_ungtr_f: + case jit_code_ltgtr_f: + case jit_code_ordr_f: + case jit_code_unordr_f: + case jit_code_ldxr_f: + case jit_code_addr_d: + case jit_code_subr_d: + case jit_code_mulr_d: + case jit_code_divr_d: + case jit_code_ltr_d: + case jit_code_ler_d: + case jit_code_eqr_d: + case jit_code_ger_d: + case jit_code_gtr_d: + case jit_code_ner_d: + case jit_code_unltr_d: + case jit_code_unler_d: + case jit_code_uneqr_d: + case jit_code_unger_d: + case jit_code_ungtr_d: + case jit_code_ltgtr_d: + case jit_code_ordr_d: + case jit_code_unordr_d: + case jit_code_ldxr_d: + case jit_code_movr_ww_d: + case jit_code_sti_f: + case jit_code_sti_d: + case jit_code_blti_f: + case jit_code_blei_f: + case jit_code_beqi_f: + case jit_code_bgei_f: + case jit_code_bgti_f: + case jit_code_bnei_f: + case jit_code_bunlti_f: + case jit_code_bunlei_f: + case jit_code_buneqi_f: + case jit_code_bungei_f: + case jit_code_bungti_f: + case jit_code_bltgti_f: + case jit_code_bordi_f: + case jit_code_bunordi_f: + case jit_code_blti_d: + case jit_code_blei_d: + case jit_code_beqi_d: + case jit_code_bgei_d: + case jit_code_bgti_d: + case jit_code_bnei_d: + case jit_code_bunlti_d: + case jit_code_bunlei_d: + case jit_code_buneqi_d: + case jit_code_bungei_d: + case jit_code_bungti_d: + case jit_code_bltgti_d: + case jit_code_bordi_d: + case jit_code_bunordi_d: + case jit_code_str_f: + case jit_code_str_d: + case jit_code_stxi_f: + case jit_code_stxi_d: + case jit_code_bltr_f: + case jit_code_bler_f: + case jit_code_beqr_f: + case jit_code_bger_f: + case jit_code_bgtr_f: + case jit_code_bner_f: + case jit_code_bunltr_f: + case jit_code_bunler_f: + case jit_code_buneqr_f: + case jit_code_bunger_f: + case jit_code_bungtr_f: + case jit_code_bltgtr_f: + case jit_code_bordr_f: + case jit_code_bunordr_f: + case jit_code_bltr_d: + case jit_code_bler_d: + case jit_code_beqr_d: + case jit_code_bger_d: + case jit_code_bgtr_d: + case jit_code_bner_d: + case jit_code_bunltr_d: + case jit_code_bunler_d: + case jit_code_buneqr_d: + case jit_code_bunger_d: + case jit_code_bungtr_d: + case jit_code_bltgtr_d: + case jit_code_bordr_d: + case jit_code_bunordr_d: + case jit_code_stxr_f: + case jit_code_stxr_d: + case jit_code_fmar_f: + case jit_code_fmar_d: + case jit_code_fmsr_f: + case jit_code_fmsr_d: + case jit_code_fnmar_f: + case jit_code_fnmar_d: + case jit_code_fnmsr_f: + case jit_code_fnmsr_d: + case jit_code_fmai_f: + case jit_code_fmsi_f: + case jit_code_fnmai_f: + case jit_code_fnmsi_f: + case jit_code_fmai_d: + case jit_code_fmsi_d: + case jit_code_fnmai_d: + case jit_code_fnmsi_d: + case jit_code_ldxbi_f: + case jit_code_ldxai_f: + case jit_code_ldxbi_d: + case jit_code_ldxai_d: + case jit_code_ldxbr_f: + case jit_code_ldxar_f: + case jit_code_ldxbr_d: + case jit_code_ldxar_d: + case jit_code_stxbi_f: + case jit_code_stxai_f: + case jit_code_stxbi_d: + case jit_code_stxai_d: + case jit_code_stxbr_f: + case jit_code_stxar_f: + case jit_code_stxbr_d: + case jit_code_stxar_d: + return 1; + default: + return 0; + } +} + +void +jit_get_cpu(void) +{ +} + +void +_jit_init(jit_state_t *_jit) +{ + _jitc->reglen = jit_size(_rvs) - 1; +} + +void +_jit_prolog(jit_state_t *_jit) +{ + jit_int32_t offset; + + if (_jitc->function) + jit_epilog(); + + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + + jit_regset_set_ui(&_jitc->regsav, 0); + offset = _jitc->functions.offset; + + if (offset >= _jitc->functions.length) { + jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, + _jitc->functions.length * sizeof(jit_function_t), + (_jitc->functions.length + 16) * sizeof(jit_function_t)); + _jitc->functions.length += 16; + } + + _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; + _jitc->function->self.size = stack_framesize; + _jitc->function->self.argi = _jitc->function->self.argf = + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + _jitc->function->self.call = jit_call_default; + + jit_alloc((jit_pointer_t *)&_jitc->function->regoff, + _jitc->reglen * sizeof(jit_int32_t)); + + /* _no_link here does not mean the jit_link() call can be removed + * by rewriting as: + * _jitc->function->prolog = jit_new_node(jit_code_prolog); + */ + _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); + jit_link(_jitc->function->prolog); + + _jitc->function->prolog->w.w = offset; + _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); + /* u: label value + * v: offset in blocks vector + * w: offset in functions vector + */ + _jitc->function->epilog->w.w = offset; + + jit_regset_new(&_jitc->function->regset); +} + +jit_int32_t +_jit_allocai(jit_state_t *_jit, jit_int32_t length) +{ + assert(_jitc->function); + switch (length) { + case 0: case 1: break; + case 2: _jitc->function->self.aoff &= -2; break; + case 3: case 4: _jitc->function->self.aoff &= -4; break; + default: _jitc->function->self.aoff &= -8; break; + } + _jitc->function->self.aoff -= length; + if (!_jitc->realize) { + jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); + jit_dec_synth(); + } + return (_jitc->function->self.aoff); +} + +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + jit_inc_synth_ww(allocar, u, v); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); + jit_dec_synth(); +} + +void +_jit_ret(jit_state_t *_jit) +{ + jit_node_t *instr; + assert(_jitc->function); + jit_inc_synth(ret); + /* jump to epilog */ + instr = jit_jmpi(); + jit_patch_at(instr, _jitc->function->epilog); + jit_dec_synth(); +} + +void +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); + jit_live(JIT_RET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movi(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_epilog(jit_state_t *_jit) +{ + assert(_jitc->function); + assert(_jitc->function->epilog->next == NULL); + jit_link(_jitc->function->epilog); + _jitc->function = NULL; +} + +void +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) +{ + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(JIT_RA0 + v->u.w, u); + else + jit_stxi(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) +{ + jit_int32_t regno; + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movi(JIT_RA0 + v->u.w, u); + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +jit_bool_t +_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) +{ + jit_int32_t spec; + + spec = jit_class(_rvs[regno].spec); + if (spec & jit_class_arg) { + if (spec & jit_class_gpr) { + regno = JIT_RA0 + regno; + if (regno >= 0 && regno < node->v.w) + return (1); + } + else if (spec & jit_class_fpr) { + regno = JIT_FA0 + regno; + if (regno >= 0 && regno < node->w.w) + return (1); + } + } + + return (0); +} + +jit_pointer_t +_emit_code(jit_state_t *_jit) +{ + jit_node_t *node; + jit_node_t *temp; + jit_word_t word; + jit_word_t value; + jit_int32_t offset; + struct { + jit_node_t *node; + jit_uint8_t *data; + jit_word_t word; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif +#if DISASSEMBLER + jit_int32_t info_offset; +#endif + jit_int32_t const_offset; + jit_int32_t patch_offset; + } undo; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + + _jitc->function = NULL; + _jitc->no_flag = 0; + _jitc->mode_d = SH_DEFAULT_FPU_MODE; + _jitc->uses_fpu = 0; + + jit_reglive_setup(); + + _jitc->consts.data = NULL; + _jitc->consts.offset = _jitc->consts.length = 0; + + undo.word = 0; + undo.node = NULL; + undo.data = NULL; +#if DISASSEMBLER + undo.info_offset = +#endif + undo.const_offset = undo.patch_offset = 0; +#define case_rr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_rw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_wr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; +#define case_rrf(name) \ + case jit_code_##name##i_f: \ + name##i_f(rn(node->u.w), rn(node->v.w), node->w.f); \ + break +#define case_rrd(name) \ + case jit_code_##name##i_d: \ + name##i_d(rn(node->u.w), rn(node->v.w), node->w.d); \ + break +#define case_wrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_brr(name, type) \ + case jit_code_##name##r##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##r##type(temp->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + else { \ + word = _jit->code.length \ + - (_jit->pc.uc - _jit->code.ptr); \ + if (word < 4094) { \ + word = name##r##type(0, rn(node->v.w), \ + rn(node->w.w)); \ + } else { \ + word = name##r##type##_p(_jit->pc.w, \ + rn(node->v.w), \ + rn(node->w.w), 1); \ + } \ + patch(word, node); \ + } \ + break +#define case_brw(name, type) \ + case jit_code_##name##i##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i##type(temp->u.w, \ + rn(node->v.w), node->w.w); \ + else { \ + word = _jit->code.length \ + - (_jit->pc.uc - _jit->code.ptr); \ + if (word < 4094) { \ + word = name##i##type(0, rn(node->v.w), \ + node->w.w); \ + } else { \ + word = name##i##type##_p(_jit->pc.w, \ + rn(node->v.w), \ + node->w.w, 1); \ + } \ + patch(word, node); \ + } \ + break; +#define case_brf(name) \ + case jit_code_##name##i_f: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_f(temp->u.w, rn(node->v.w), node->w.f); \ + else { \ + word = name##i_f_p(_jit->pc.w, rn(node->v.w), \ + node->w.f, 1); \ + patch(word, node); \ + } \ + break +#define case_brd(name) \ + case jit_code_##name##i_d: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_d(temp->u.w, rn(node->v.w), node->w.d); \ + else { \ + word = name##i_d_p(_jit->pc.w, rn(node->v.w), \ + node->w.d, 1); \ + patch(word, node); \ + } \ + break +#if DEVEL_DISASSEMBLER + prevw = _jit->pc.w; +#endif + if (SH_HAS_FPU) { + for (node = _jitc->head; node && !_jitc->uses_fpu; node = node->next) + _jitc->uses_fpu = jit_uses_fpu(node->code); + } + + for (node = _jitc->head; node; node = node->next) { + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + +#if DEVEL_DISASSEMBLER + node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw; + prevw = _jit->pc.w; +#endif + value = jit_classify(node->code); + jit_regarg_set(node, value); + switch (node->code) { + case jit_code_align: + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; + case jit_code_note: case jit_code_name: + node->u.w = _jit->pc.w; + break; + case jit_code_label: + /* remember label is defined */ + node->flag |= jit_flag_patch; + /* Reset FPU mode */ + set_fmode_no_r0(_jit, SH_DEFAULT_FPU_MODE); + node->u.w = _jit->pc.w; + break; + case_rrr(add,); + case_rrw(add,); + case_rrr(addc,); + case_rrw(addc,); + case_rrr(addx,); + case_rrw(addx,); + case_rrr(sub,); + case_rrw(sub,); + case_rrr(subc,); + case_rrw(subc,); + case_rrr(subx,); + case_rrw(subx,); + case_rrw(rsb,); + case_rrr(mul,); + case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); + case_rrr(div,); + case_rrw(div,); + case_rrr(div, _u); + case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); + case_rrr(lsh,); + case_rrw(lsh,); + case_rrrr(qlsh,); + case_rrrw(qlsh,); + case_rrrr(qlsh, _u); + case_rrrw(qlsh, _u); + case_rrr(rsh,); + case_rrw(rsh,); + case_rrr(rsh, _u); + case_rrw(rsh, _u); + case_rrrr(qrsh,); + case_rrrw(qrsh,); + case_rrrr(qrsh, _u); + case_rrrw(qrsh, _u); + case_rr(neg,); + case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); + case_rr(rbit,); + case_rr(popcnt,); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); + case_rrr(and,); + case_rrw(and,); + case_rrr(or,); + case_rrw(or,); + case_rrr(xor,); + case_rrw(xor,); + case_rr(trunc, _f_i); + case_rr(trunc, _d_i); + case_rr(ld, _c); + case_rw(ld, _c); + case_rr(ld, _uc); + case_rw(ld, _uc); + case_rr(ld, _s); + case_rw(ld, _s); + case_rr(ld, _us); + case_rw(ld, _us); + case_rr(ld, _i); + case_rw(ld, _i); + case_rrr(ldx, _c); + case_rrw(ldx, _c); + case_rrr(ldx, _uc); + case_rrw(ldx, _uc); + case_rrr(ldx, _s); + case_rrw(ldx, _s); + case_rrr(ldx, _us); + case_rrw(ldx, _us); + case_rrr(ldx, _i); + case_rrw(ldx, _i); + case jit_code_unldr: + unldr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi: + unldi(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_unldr_u: + unldr_u(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_u: + unldi_u(rn(node->u.w), node->v.w, node->w.w); + break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); + case_rr(st, _c); + case_wr(st, _c); + case_rr(st, _s); + case_wr(st, _s); + case_rr(st, _i); + case_wr(st, _i); + case_rrr(stx, _c); + case_wrr(stx, _c); + case_rrr(stx, _s); + case_wrr(stx, _s); + case_rrr(stx, _i); + case_wrr(stx, _i); + case jit_code_unstr: + unstr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti: + unsti(node->u.w, rn(node->v.w), node->w.w); + break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); + case_rr(hton, _us); + case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case jit_code_extr: + extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_extr_u: + extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depr: + depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depi: + depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case_rr(ext, _c); + case_rr(ext, _uc); + case_rr(ext, _s); + case_rr(ext, _us); + case_rrr(movn,); + case_rrr(movz,); + case_rr(mov,); + case jit_code_movi: + if (node->flag & jit_flag_node) { + temp = node->v.n; + if (temp->code == jit_code_data || + (temp->code == jit_code_label && + (temp->flag & jit_flag_patch))) + movi(rn(node->u.w), temp->u.w); + else { + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + word = movi_p(rn(node->u.w), temp->u.w); + patch(word, node); + } + } + else + movi(rn(node->u.w), node->v.w); + break; + case_rrr(lt,); + case_rrw(lt,); + case_rrr(lt, _u); + case_rrw(lt, _u); + case_rrr(le,); + case_rrw(le,); + case_rrr(le, _u); + case_rrw(le, _u); + case_rrr(eq,); + case_rrw(eq,); + case_rrr(ge,); + case_rrw(ge,); + case_rrr(ge, _u); + case_rrw(ge, _u); + case_rrr(gt,); + case_rrw(gt,); + case_rrr(gt, _u); + case_rrw(gt, _u); + case_rrr(ne,); + case_rrw(ne,); + case_brr(blt,); + case_brw(blt,); + case_brr(blt, _u); + case_brw(blt, _u); + case_brr(ble,); + case_brw(ble,); + case_brr(ble, _u); + case_brw(ble, _u); + case_brr(beq,); + case_brw(beq,); + case_brr(bge,); + case_brw(bge,); + case_brr(bge, _u); + case_brw(bge, _u); + case_brr(bgt,); + case_brw(bgt,); + case_brr(bgt, _u); + case_brw(bgt, _u); + case_brr(bne,); + case_brw(bne,); + case_brr(boadd,); + case_brw(boadd,); + case_brr(boadd, _u); + case_brw(boadd, _u); + case_brr(bxadd,); + case_brw(bxadd,); + case_brr(bxadd, _u); + case_brw(bxadd, _u); + case_brr(bosub,); + case_brw(bosub,); + case_brr(bosub, _u); + case_brw(bosub, _u); + case_brr(bxsub,); + case_brw(bxsub,); + case_brr(bxsub, _u); + case_brw(bxsub, _u); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); + case_rrr(add, _f); + case_rrf(add); + case_rrr(sub, _f); + case_rrf(sub); + case_rrf(rsb); + case_rrr(mul, _f); + case_rrf(mul); + case_rrr(div, _f); + case_rrf(div); + case_rr(abs, _f); + case_rr(neg, _f); + case_rr(sqrt, _f); + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); + case_rr(ext, _f); + case_rr(ld, _f); + case_rw(ld, _f); + case_rrr(ldx, _f); + case_rrw(ldx, _f); + case jit_code_unldr_x: + unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_x: + unldi_x(rn(node->u.w), node->v.w, node->w.w); + break; + case_rr(st, _f); + case_wr(st, _f); + case_rrr(stx, _f); + case_wrr(stx, _f); + case jit_code_unstr_x: + unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti_x: + unsti_x(node->u.w, rn(node->v.w), node->w.w); + break; + case_rr(mov, _f); + case jit_code_movi_f: + movi_f(rn(node->u.w), node->v.f); + break; + case_rr(ext, _d_f); + case_rrr(lt, _f); + case_rrf(lt); + case_rrr(le, _f); + case_rrf(le); + case_rrr(eq, _f); + case_rrf(eq); + case_rrr(ge, _f); + case_rrf(ge); + case_rrr(gt, _f); + case_rrf(gt); + case_rrr(ne, _f); + case_rrf(ne); + case_rrr(unlt, _f); + case_rrf(unlt); + case_rrr(unle, _f); + case_rrf(unle); + case_rrr(uneq, _f); + case_rrf(uneq); + case_rrr(unge, _f); + case_rrf(unge); + case_rrr(ungt, _f); + case_rrf(ungt); + case_rrr(ltgt, _f); + case_rrf(ltgt); + case_rrr(ord, _f); + case_rrf(ord); + case_rrr(unord, _f); + case_rrf(unord); + case_brr(blt, _f); + case_brf(blt); + case_brr(ble, _f); + case_brf(ble); + case_brr(beq, _f); + case_brf(beq); + case_brr(bge, _f); + case_brf(bge); + case_brr(bgt, _f); + case_brf(bgt); + case_brr(bne, _f); + case_brf(bne); + case_brr(bunlt, _f); + case_brf(bunlt); + case_brr(bunle, _f); + case_brf(bunle); + case_brr(buneq, _f); + case_brf(buneq); + case_brr(bunge, _f); + case_brf(bunge); + case_brr(bungt, _f); + case_brf(bungt); + case_brr(bltgt, _f); + case_brf(bltgt); + case_brr(bord, _f); + case_brf(bord); + case_brr(bunord, _f); + case_brf(bunord); + case_rrr(add, _d); + case_rrd(add); + case_rrr(sub, _d); + case_rrd(sub); + case_rrd(rsb); + case_rrr(mul, _d); + case_rrd(mul); + case_rrr(div, _d); + case_rrd(div); + case_rr(abs, _d); + case_rr(neg, _d); + case_rr(sqrt, _d); + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); + case_rr(ext, _d); + case_rr(ld, _d); + case_rw(ld, _d); + case_rrr(ldx, _d); + case_rrw(ldx, _d); + case_rr(st, _d); + case_wr(st, _d); + case_rrr(stx, _d); + case_wrr(stx, _d); + case_rr(mov, _d); + case jit_code_movi_d: + movi_d(rn(node->u.w), node->v.d); + break; + case_rr(ext, _f_d); + case_rrr(lt, _d); + case_rrd(lt); + case_rrr(le, _d); + case_rrd(le); + case_rrr(eq, _d); + case_rrd(eq); + case_rrr(ge, _d); + case_rrd(ge); + case_rrr(gt, _d); + case_rrd(gt); + case_rrr(ne, _d); + case_rrd(ne); + case_rrr(unlt, _d); + case_rrd(unlt); + case_rrr(unle, _d); + case_rrd(unle); + case_rrr(uneq, _d); + case_rrd(uneq); + case_rrr(unge, _d); + case_rrd(unge); + case_rrr(ungt, _d); + case_rrd(ungt); + case_rrr(ltgt, _d); + case_rrd(ltgt); + case_rrr(ord, _d); + case_rrd(ord); + case_rrr(unord, _d); + case_rrd(unord); + case_brr(blt, _d); + case_brd(blt); + case_brr(ble, _d); + case_brd(ble); + case_brr(beq, _d); + case_brd(beq); + case_brr(bge, _d); + case_brd(bge); + case_brr(bgt, _d); + case_brd(bgt); + case_brr(bne, _d); + case_brd(bne); + case_brr(bunlt, _d); + case_brd(bunlt); + case_brr(bunle, _d); + case_brd(bunle); + case_brr(buneq, _d); + case_brd(buneq); + case_brr(bunge, _d); + case_brd(bunge); + case_brr(bungt, _d); + case_brd(bungt); + case_brr(bltgt, _d); + case_brd(bltgt); + case_brr(bord, _d); + case_brd(bord); + case_brr(bunord, _d); + case_brd(bunord); + case jit_code_jmpr: + jmpr(rn(node->u.w)); + flush_consts(0); + break; + case jit_code_jmpi: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + jmpi(temp->u.w); + else { + word = jmpi_p(_jit->pc.w); + patch(word, node); + } + } + else + jmpi(node->u.w); + flush_consts(0); + break; + case jit_code_callr: + callr(rn(node->u.w)); + break; + case jit_code_calli: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = calli_p(_jit->pc.w); + patch(word, node); + } + } + else + calli(node->u.w); + break; + case jit_code_prolog: + _jitc->function = _jitc->functions.ptr + node->w.w; + undo.node = node; + undo.word = _jit->pc.w; +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif + undo.data = _jitc->consts.data; + undo.const_offset = _jitc->consts.offset; + undo.patch_offset = _jitc->patches.offset; +#if DISASSEMBLER + if (_jitc->data_info.ptr) + undo.info_offset = _jitc->data_info.offset; +#endif + restart_function: + _jitc->again = 0; + prolog(node); + break; + case jit_code_epilog: + assert(_jitc->function == _jitc->functions.ptr + node->w.w); + if (_jitc->again) { + for (temp = undo.node->next; + temp != node; temp = temp->next) { + if (temp->code == jit_code_label || + temp->code == jit_code_epilog) + temp->flag &= ~jit_flag_patch; + } + temp->flag &= ~jit_flag_patch; + node = undo.node; + _jit->pc.w = undo.word; +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif + invalidate_consts(); + _jitc->consts.data = undo.data; + _jitc->consts.offset = undo.const_offset; + _jitc->patches.offset = undo.patch_offset; +#if DISASSEMBLER + if (_jitc->data_info.ptr) + _jitc->data_info.offset = undo.info_offset; +#endif + goto restart_function; + } + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + epilog(node); + _jitc->function = NULL; + flush_consts(0); + break; + case jit_code_movr_w_f: + movr_w_f(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movr_f_w: + movr_f_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movi_f_w: + movi_f_w(rn(node->u.w), node->v.f); + break; + case jit_code_movi_w_f: + movi_w_f(rn(node->u.w), node->v.w); + break; + case jit_code_movr_ww_d: + movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_movr_d_ww: + movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_movi_d_ww: + movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d); + break; + case jit_code_movi_ww_d: + movi_ww_d(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_va_start: + vastart(rn(node->u.w)); + break; + case jit_code_va_arg: + vaarg(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_va_arg_d: + vaarg_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_live: case jit_code_ellipsis: + case jit_code_va_push: + case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: + case jit_code_arg_f: case jit_code_arg_d: + case jit_code_va_end: + case jit_code_ret: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_f: case jit_code_reti_f: + case jit_code_retr_d: case jit_code_reti_d: + case jit_code_getarg_c: case jit_code_getarg_uc: + case jit_code_getarg_s: case jit_code_getarg_us: + case jit_code_getarg_i: case jit_code_getarg_ui: + case jit_code_getarg_l: + case jit_code_getarg_f: case jit_code_getarg_d: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_f: case jit_code_putargi_f: + case jit_code_putargr_d: case jit_code_putargi_d: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_f: case jit_code_pushargi_f: + case jit_code_pushargr_d: case jit_code_pushargi_d: + case jit_code_retval_c: case jit_code_retval_uc: + case jit_code_retval_s: case jit_code_retval_us: + case jit_code_retval_i: + case jit_code_retval_f: case jit_code_retval_d: + case jit_code_prepare: + case jit_code_finishr: case jit_code_finishi: + break; + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_negi_f: case jit_code_absi_f: + case jit_code_sqrti_f: case jit_code_negi_d: + case jit_code_absi_d: case jit_code_sqrti_d: + break; + case jit_code_negi: + negi(rn(node->u.w), node->v.w); + break; + case jit_code_comi: + comi(rn(node->u.w), node->v.w); + break; + case jit_code_exti_c: + exti_c(rn(node->u.w), node->v.w); + break; + case jit_code_exti_uc: + exti_uc(rn(node->u.w), node->v.w); + break; + case jit_code_exti_s: + exti_s(rn(node->u.w), node->v.w); + break; + case jit_code_exti_us: + exti_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_us: + bswapi_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_ui: + bswapi_ui(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_us: + htoni_us(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_ui: + htoni_ui(rn(node->u.w), node->v.w); + break; + case jit_code_cloi: + cloi(rn(node->u.w), node->v.w); + break; + case jit_code_clzi: + clzi(rn(node->u.w), node->v.w); + break; + case jit_code_ctoi: + ctoi(rn(node->u.w), node->v.w); + break; + case jit_code_ctzi: + ctzi(rn(node->u.w), node->v.w); + break; + case jit_code_rbiti: + rbiti(rn(node->u.w), node->v.w); + break; + case jit_code_popcnti: + popcnti(rn(node->u.w), node->v.w); + break; + case jit_code_exti: + exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case jit_code_exti_u: + exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + default: + printf("ABORT MISSION (%i)\n", node->code); + abort(); + } + jit_regarg_clr(node, value); + assert(_jitc->regarg == 0); + assert(_jitc->synth == 0); + /* update register live state */ + jit_reglive(node); + + _jitc->no_flag = !(node->flag & jit_flag_patch); + + if (_jitc->consts.length && + (jit_uword_t)_jit->pc.uc - (jit_uword_t)_jitc->consts.patches[0] >= 900) { + /* Maximum displacement for mov.l is +1020 bytes. If we're already +900 bytes + * since the first mov.l, force a flush. */ + + if (node->next && + node->next->code != jit_code_jmpi && + node->next->code != jit_code_jmpr && + node->next->code != jit_code_epilog) { + /* insert a jump, flush constants and continue */ + word = _jit->pc.w; + BRA(0); + NOP(); + flush_consts(1); + patch_at(word, _jit->pc.w); + } + } + } +#undef case_brw +#undef case_brr +#undef case_wrr +#undef case_rrw +#undef case_rrr +#undef case_wr +#undef case_rw +#undef case_rr + + flush_consts(1); + + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + word = _jitc->patches.ptr[offset].inst; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + patch_at(word, value); + } + + jit_flush(_jit->code.ptr, _jit->pc.uc); + + return (_jit->code.ptr); +} + +#define CODE 1 +# include "jit_rewind.c" +# include "jit_sh-cpu.c" +# include "jit_sh-fpu.c" +# include "jit_fallback.c" +#undef CODE + +void +jit_flush(void *fptr, void *tptr) +{ +#if defined(__linux__) + jit_uword_t i, f, t, s; + + s = sysconf(_SC_PAGE_SIZE); + f = (jit_uword_t)fptr & -s; + t = (((jit_uword_t)tptr) + s - 1) & -s; + for (i = f; i < t; i += s) + __clear_cache((void *)i, (void *)(i + s)); +#endif +} + +void +_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi(rn(r0), rn(r1), i0); +} + +void +_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi(i0, rn(r0), rn(r1)); +} + +void +_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* No FPU support */ +} + +void +_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* No FPU support */ +} + +static void +_load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t w; + jit_word_t d; + jit_word_t base; + jit_int32_t *data; + jit_int32_t size; + jit_int32_t offset; + + _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w; + /* positive forward offset */ + LDPL(r0, 0); + + if (!uniq) { + /* search already requested values */ + for (offset = 0; offset < _jitc->consts.length; offset++) { + if (_jitc->consts.values[offset] == i0) { + _jitc->consts.patches[_jitc->consts.offset++] = offset; + return; + } + } + } + +#if DEBUG + /* cannot run out of space because of limited range + * but assert anyway to catch logic errors */ + assert(_jitc->consts.length < 1024); + assert(_jitc->consts.offset < 2048); +#endif + _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length; + _jitc->consts.values[_jitc->consts.length++] = i0; +} + +static void +_load_const_f(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_float32_t f0) +{ + jit_word_t w; + jit_word_t d; + jit_word_t base; + jit_int32_t *data; + jit_int32_t size; + jit_int32_t offset; + union fl32 { + jit_int32_t i; + jit_float32_t f; + }; + jit_uint32_t i0 = ((union fl32)f0).i; + + _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w; + /* positive forward offset */ + MOVA(0); + LDF(r0, _R0); + + if (!uniq) { + /* search already requested values */ + for (offset = 0; offset < _jitc->consts.length; offset++) { + if (_jitc->consts.values[offset] == i0) { + _jitc->consts.patches[_jitc->consts.offset++] = offset; + return; + } + } + } + +#if DEBUG + /* cannot run out of space because of limited range + * but assert anyway to catch logic errors */ + assert(_jitc->consts.length < 1024); + assert(_jitc->consts.offset < 2048); +#endif + _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length; + _jitc->consts.values[_jitc->consts.length++] = i0; +} + +static void +_flush_consts(jit_state_t *_jit, jit_bool_t force) +{ + jit_word_t word; + jit_int32_t offset; + + /* if no forward constants */ + if (!_jitc->consts.length) + return; + + word = _jit->code.length - (_jit->pc.uc - _jit->code.ptr) + - (_jitc->consts.length << 1); + if (!force && word < 1024) + return; + + /* Align to 32 bits */ + if (_jit->pc.w & 0x3) + NOP(); + + word = _jit->pc.w; + _jitc->consts.data = _jit->pc.uc; + _jitc->consts.size = _jitc->consts.length << 2; + /* FIXME check will not overrun, otherwise, need to reallocate + * code buffer and start over */ + jit_memcpy(_jitc->consts.data, _jitc->consts.values, _jitc->consts.size); + _jit->pc.w += _jitc->consts.size; + +#if DISASSEMBLER + if (_jitc->data_info.ptr) { + if (_jitc->data_info.offset >= _jitc->data_info.length) { + jit_realloc((jit_pointer_t *)&_jitc->data_info.ptr, + _jitc->data_info.length * sizeof(jit_data_info_t), + (_jitc->data_info.length + 1024) * + sizeof(jit_data_info_t)); + _jitc->data_info.length += 1024; + } + _jitc->data_info.ptr[_jitc->data_info.offset].code = word; + _jitc->data_info.ptr[_jitc->data_info.offset].length = _jitc->consts.size; + ++_jitc->data_info.offset; + } +#endif + + for (offset = 0; offset < _jitc->consts.offset; offset += 2) + patch_at(_jitc->consts.patches[offset], + word + (_jitc->consts.patches[offset + 1] << 2)); + _jitc->consts.length = _jitc->consts.offset = 0; +} + +/* to be called if needing to start over a function */ +static void +_invalidate_consts(jit_state_t *_jit) +{ + /* if no forward constants */ + if (_jitc->consts.length) + _jitc->consts.length = _jitc->consts.offset = 0; +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +{ + jit_int32_t flag; + + assert(node->flag & jit_flag_node); + if (node->code == jit_code_movi) + flag = node->v.n->flag; + else + flag = node->u.n->flag; + assert(!(flag & jit_flag_patch)); + if (_jitc->patches.offset >= _jitc->patches.length) { + jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, + _jitc->patches.length * sizeof(jit_patch_t), + (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); + _jitc->patches.length += 1024; + } + _jitc->patches.ptr[_jitc->patches.offset].inst = instr; + _jitc->patches.ptr[_jitc->patches.offset].node = node; + ++_jitc->patches.offset; +} + +static jit_node_t * +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) +{ + jit_int32_t offset; + + if (jit_arg_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT; + } + + if (node == (jit_node_t *)0) + node = jit_new_node(code); + else + link_node(node); + + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + + return (node); +} + +static jit_node_t * +_jit_make_arg_f(jit_state_t *_jit, jit_node_t *node) +{ + jit_int32_t offset; + + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { + offset = _jitc->function->self.argf++; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT; + } + if (node == (jit_node_t *)0) + node = jit_new_node(jit_code_arg_f); + else + link_node(node); + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + return (node); +} + +static jit_node_t * +_jit_make_arg_d(jit_state_t *_jit, jit_node_t *node) +{ + jit_int32_t offset; + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { + offset = (_jitc->function->self.argf + 1) & ~1; + _jitc->function->self.argf = offset + 2; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT * 2; + } + if (node == (jit_node_t *)0) + node = jit_new_node(jit_code_arg_d); + else + link_node(node); + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg(jit_state_t *_jit, jit_code_t code) +{ + assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); + return (jit_make_arg((jit_node_t*)0, code)); +} + +void +_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_c, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, JIT_RA0 + v->u.w); + else + jit_ldxi_c(u, JIT_FP, v->u.w + C_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_uc, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, JIT_RA0 + v->u.w); + else + jit_ldxi_uc(u, JIT_FP, v->u.w + C_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_s, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, JIT_RA0 + v->u.w); + else + jit_ldxi_s(u, JIT_FP, v->u.w + S_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_us, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, JIT_RA0 + v->u.w); + else + jit_ldxi_us(u, JIT_FP, v->u.w + S_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_i); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) { + jit_movr(u, JIT_RA0 + v->u.w); + } + else + jit_ldxi_i(u, JIT_FP, v->u.w + I_DISP); + jit_dec_synth(); +} + +void +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(JIT_RA0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT; + } + jit_dec_synth(); +} + +void +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_int32_t regno; + assert(_jitc->function); + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(JIT_RA0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_finishr(jit_state_t *_jit, jit_int32_t r0) +{ + jit_node_t *call; + assert(_jitc->function); + jit_inc_synth_w(finishr, r0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + call = jit_callr(r0); + call->v.w = _jitc->function->self.argi; + call->w.w = _jitc->function->self.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); +} + +jit_node_t * +_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) +{ + jit_node_t *call; + assert(_jitc->function); + jit_inc_synth_w(finishi, (jit_word_t)i0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + call = jit_calli(i0); + call->v.w = _jitc->function->call.argi; + call->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); + return (call); +} + +void +_jit_retval_c(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_c(r0, JIT_RET); +} + +void +_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_uc(r0, JIT_RET); +} + +void +_jit_retval_s(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_s(r0, JIT_RET); +} + +void +_jit_retval_us(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_us(r0, JIT_RET); +} + +void +_jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr(r0, JIT_RET); +} + +void +_jit_ellipsis(jit_state_t *_jit) +{ + jit_inc_synth(ellipsis); + if (_jitc->prepare) { + jit_link_prepare(); + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + jit_link_prolog(); + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + _jitc->function->vagp = _jitc->function->self.argi; + _jitc->function->vafp = _jitc->function->self.argf; + _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t) + /* +1 to ensure 8-byte alignment */ + + (NUM_WORD_ARGS + NUM_FLOAT_ARGS + 1) * 4); + } + jit_dec_synth(); +} + +void +_jit_va_push(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t i, reg; + jit_inc_synth_w(va_push, u); + + reg = jit_get_reg(jit_class_gpr); + + for (i = 0; i < 5; i++) { + jit_ldxi(reg, u, i * 4); + jit_stxi(_jitc->function->call.size + i * 4, JIT_SP, reg); + } + + jit_unget_reg(reg); + + _jitc->function->call.size += 5 * 4; + + jit_dec_synth(); +} + +jit_bool_t +_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) +{ + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + + assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); + return (jit_arg_f_reg_p(u->u.w)); +} + +jit_node_t * +_jit_arg_f(jit_state_t *_jit) +{ + assert(_jitc->function); + return (jit_make_arg_f((jit_node_t*)0)); +} + +jit_node_t * +_jit_arg_d(jit_state_t *_jit) +{ + assert(_jitc->function); + return (jit_make_arg_d((jit_node_t*)0)); +} + +void +_jit_retval_f(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr_f(r0, JIT_FRET); +} + +void +_jit_retval_d(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr_d(r0, JIT_FRET); +} + +void +_jit_retr_f(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_f, u); + jit_movr_f(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_inc_synth_w(reti_f, u); + jit_movi_f(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_d(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_d, u); + jit_movr_d(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_inc_synth_w(reti_d, u); + jit_movi_d(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargr_f, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movr_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT; + } + jit_dec_synth(); +} + +void +_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargi_f, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movi_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t regno; + jit_inc_synth_w(pushargr_d, u); + jit_link_prepare(); + assert(_jitc->function); + + regno = (_jitc->function->call.argf + 1) & ~1; + if (jit_arg_f_reg_p(regno)) { + jit_movr_d(JIT_FA0 + regno, u); + _jitc->function->call.argf = regno + 2; + } + else { + jit_stxi_d(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT * 2; + } + jit_dec_synth(); +} + +void +_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargi_d, u); + jit_link_prepare(); + assert(_jitc->function); + + regno = (_jitc->function->call.argf + 1) & ~1; + if (jit_arg_f_reg_p(regno)) { + jit_movi_d(JIT_FA0 + regno, u); + _jitc->function->call.argf = regno + 2; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT * 2; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargr, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u); + else + jit_stxi_f(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) +{ + jit_int32_t regno; + + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargi, u, v); + if (jit_arg_f_reg_p(v->u.w)) { + jit_movi_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u); + } else { + regno = jit_get_reg(jit_class_fpr); + + jit_movi_f(regno, u); + jit_stxi_f(v->u.w, JIT_FP, regno); + + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargr, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(JIT_FA0 + v->u.w, u); + else + jit_stxi_d(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) +{ + jit_int32_t regno; + + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargi, u, v); + if (jit_arg_f_reg_p(v->u.w)) { + jit_movi_d(JIT_FA0 + v->u.w, u); + } else { + regno = jit_get_reg(jit_class_fpr); + + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(getarg_d, u, v); + + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(u, JIT_FA0 + v->u.w); + else + jit_ldxi_d(u, JIT_FP, v->u.w); + + jit_dec_synth(); +} + +void +_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(getarg_f, u, v); + + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(u, JIT_FA0 + (v->u.w ^ fpr_args_inverted())); + else + jit_ldxi_f(u, JIT_FP, v->u.w); + + jit_dec_synth(); +} diff --git a/lib/jit_size.c b/lib/jit_size.c index 143a5d9..24a5c95 100644 --- a/lib/jit_size.c +++ b/lib/jit_size.c @@ -54,6 +54,8 @@ static jit_int16_t _szs[jit_code_last_code] = { # include "jit_riscv-sz.c" # elif defined(__loongarch__) # include "jit_loongarch-sz.c" +# elif defined(__sh__) +# include "jit_sh-sz.c" # endif #endif }; diff --git a/lib/lightning.c b/lib/lightning.c index 83917ae..643c5f1 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -4598,6 +4598,8 @@ static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert); # include "jit_riscv.c" #elif defined(__loongarch__) # include "jit_loongarch.c" +#elif defined(__sh__) +# include "jit_sh.c" #endif static maybe_unused void |