lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
Diffstat
-rw-r--r--ChangeLog 60
-rw-r--r--check/Makefile.am 24
-rw-r--r--check/alu.inc 283
-rw-r--r--check/alu_add.ok 1
-rw-r--r--check/alu_add.tst 46
-rw-r--r--check/alu_and.ok 1
-rw-r--r--check/alu_and.tst 35
-rw-r--r--check/alu_com.ok 1
-rw-r--r--check/alu_com.tst 32
-rw-r--r--check/alu_div.ok 1
-rw-r--r--check/alu_div.tst 82
-rw-r--r--check/alu_lsh.ok 1
-rw-r--r--check/alu_lsh.tst 56
-rw-r--r--check/alu_mul.ok 1
-rw-r--r--check/alu_mul.tst 58
-rw-r--r--check/alu_neg.ok 1
-rw-r--r--check/alu_neg.tst 41
-rw-r--r--check/alu_or.ok 1
-rw-r--r--check/alu_or.tst 35
-rw-r--r--check/alu_rem.ok 1
-rw-r--r--check/alu_rem.tst 75
-rw-r--r--check/alu_rsh.ok 1
-rw-r--r--check/alu_rsh.tst 84
-rw-r--r--check/alu_sub.ok 1
-rw-r--r--check/alu_sub.tst 48
-rw-r--r--check/alu_xor.ok 1
-rw-r--r--check/alu_xor.tst 35
-rw-r--r--check/alux_add.ok 1
-rw-r--r--check/alux_add.tst 48
-rw-r--r--check/alux_sub.ok 1
-rw-r--r--check/alux_sub.tst 48
-rw-r--r--check/branch.ok 1
-rw-r--r--check/branch.tst 562
-rw-r--r--include/lightning/jit_private.h 6
-rw-r--r--lib/jit_arm-cpu.c 10
-rw-r--r--lib/jit_arm-swf.c 4
-rw-r--r--lib/jit_arm.c 12
-rw-r--r--lib/jit_x86-cpu.c 111
-rw-r--r--lib/lightning.c 23
39 files changed, 1774 insertions, 59 deletions
diff --git a/ChangeLog b/ChangeLog
index 6718b76..71a1907 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,63 @@
+2012年12月09日 Paulo Andrade <pcpa@gnu.org>
+
+ * check/alu.inc, check/alu_add.ok, check/alu_add.tst,
+ check/alu_and.ok, check/alu_and.tst, check/alu_com.ok,
+ check/alu_com.tst, check/alu_div.ok, check/alu_div.tst,
+ check/alu_lsh.ok, check/alu_lsh.tst, check/alu_mul.ok,
+ check/alu_mul.tst, check/alu_neg.ok, check/alu_neg.tst,
+ check/alu_or.ok, check/alu_or.tst, check/alu_rem.ok,
+ check/alu_rem.tst, check/alu_rsh.ok, check/alu_rsh.tst,
+ check/alu_sub.ok, check/alu_sub.tst, check/alu_xor.ok,
+ check/alu_xor.tst, check/alux_add.ok, check/alux_add.tst,
+ check/alux_sub.ok, check/alux_sub.tst, check/branch.ok,
+ check/branch.tst: New test cases for arithmetic and branch
+ tests.
+
+ * check/Makefile.am: Update for new test cases.
+
+ * include/lightning/jit_private.h: Make the jit_reg_free_p
+ macro shared by all backends. Previously was added for the
+ arm backend, but is useful in the x86_64 backend when checking
+ state of "special purpose register".
+ Also add the new jit_class_named register class, that must be
+ or'ed with the register value if calling jit_get_reg expecting
+ an specific value, because the specific register value may be
+ zero, that previously was treated as no register requested.
+
+ * lib/jit_arm-cpu.c: Correct argument order for T2_MVN.
+
+ * lib/jit_arm-swf.c: Call the proper function for double
+ divide. The "software float" implementation just calls
+ libgcc functions.
+
+ * lib/jit_arm.c: Return float/double values in the float
+ register if using the hard float ABI.
+
+ * lib/jit_x86-cpu.c: Change the can_sign_extend_int_p macro
+ to not include -0x80000000L, because there is code that
+ "abuses" it and thinks it can negate the immediate value
+ after calling that macro.
+ Correct implementation of jit_subi that had a wrong code
+ patch logic doing subtraction with reversed arguments.
+ Correct REX prefix calculation in the jit_muli implementation.
+ Correct logic to get/unget %*ax and %*dx registers in divremr
+ and divremi.
+ Correct divremi that was using the symbolic, unique %*ax
+ value in on place (not using the _REGNO name suffix).
+ Correct cut&paste error causing it to use "xor" instead of
+ "or" in one code path of the jit_ori implementation.
+ Correct several flaws when clobbering registers and/or when
+ one of the arguments was %*cx in the rotshr wrapper function
+ implementing most shift operations.
+
+ * lib/lightning.c: No longer expect that the backend be smart
+ enough to know what to do when asking for a named register
+ if that register is already an argument or is live. It fails
+ if it is an argument, or if register is live, fails if cannot
+ spill.
+ No longer incorrectly assume that eqr_{f,d} and ltgr_{f,d} are
+ safe to inverse value tests in jump thread optimization.
+
2012年12月05日 Paulo Andrade <pcpa@gnu.org>
* check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new
diff --git a/check/Makefile.am b/check/Makefile.am
index ae44dea..4ecc243 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -41,6 +41,22 @@ EXTRA_DIST = \
ldstxr-c.tst ldstxr-c.ok \
ldstxi-c.tst ldstxi-c.ok \
cvt.tst cvt.ok \
+ branch.tst branch.ok \
+ alu.inc \
+ alu_add.tst alu_add.ok \
+ alux_add.tst alux_add.ok \
+ alu_sub.tst alu_sub.ok \
+ alux_sub.tst alux_sub.ok \
+ alu_mul.tst alu_mul.ok \
+ alu_div.tst alu_div.ok \
+ alu_rem.tst alu_rem.ok \
+ alu_and.tst alu_and.ok \
+ alu_or.tst alu_or.ok \
+ alu_xor.tst alu_xor.ok \
+ alu_lsh.tst alu_lsh.ok \
+ alu_rsh.tst alu_rsh.ok \
+ alu_com.tst alu_com.ok \
+ alu_neg.tst alu_neg.ok \
check.sh run-test \
all.tst
@@ -49,7 +65,13 @@ TESTS = 3to2 add allocai \
ldstr ldsti \
ldstxr ldstxi \
ldstr-c ldstxr-c ldstxi-c \
- cvt
+ cvt branch \
+ alu_add alux_add \
+ alu_sub alux_sub \
+ alu_mul alu_div alu_rem \
+ alu_and alu_or alu_xor \
+ alu_lsh alu_rsh \
+ alu_com alu_neg
CLEANFILES = $(TESTS)
diff --git a/check/alu.inc b/check/alu.inc
new file mode 100644
index 0000000..7edf6b2
--- /dev/null
+++ b/check/alu.inc
@@ -0,0 +1,283 @@
+.data 8
+ok:
+.c "ok\n"
+
+/* 3 operand */
+
+/* reg0 = reg1 op reg2 */
+#define ALUR(N, T, OP, I0, I1, V, R0, R1, R2) \
+ movi %R1 I0 \
+ movi %R2 I1 \
+ OP##r##T %R0 %R1 %R2 \
+ beqi OP##T##N##r_##R0##R1##R2 %R0 V \
+ calli @abort \
+OP##T##N##r_##R0##R1##R2:
+
+/* reg0 = reg1 op im */
+#define ALUI(N, T, OP, I0, I1, V, R0, R1, R2) \
+ movi %R1 I0 \
+ movi %R2 V \
+ OP##i##T %R0 %R1 I1 \
+ beqr OP##T##N##i_##R0##R1##R2 %R0 %R2 \
+ calli @abort \
+OP##T##N##i_##R0##R1##R2:
+
+/* reg0 = reg0 op reg1 */
+#define ALUR0(N, T, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R1 I1 \
+ movi %R2 V \
+ OP##r##T %R0 %R0 %R1 \
+ beqr OP##T##N##r_0##R0##R1##R2 %R0 %R2 \
+ calli @abort \
+OP##T##N##r_0##R0##R1##R2:
+
+/* reg0 = reg1 op reg0 */
+#define ALUR1(N, T, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I1 \
+ movi %R1 I0 \
+ movi %R2 V \
+ OP##r##T %R0 %R1 %R0 \
+ beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2 \
+ calli @abort \
+OP##T##N##r_1##R0##R1##R2:
+
+/* reg0 = reg0 op im */
+#define ALUI0(N, T, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R1 V \
+ OP##i##T %R0 %R0 I1 \
+ beqr OP##T##N##i_0##R0##R1##R2 %R0 %R1 \
+ calli @abort \
+OP##T##N##i_0##R0##R1##R2:
+
+#define ALU3(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALUR(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALUI(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALUR0(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALUR1(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALUI0(N, T, OP, I0, I1, V, R0, R1, R2)
+
+#define ALU2(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALU3(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALU3(N, T, OP, I0, I1, V, R0, R2, R1)
+
+#define ALU1(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALU2(N, T, OP, I0, I1, V, R0, R1, R2) \
+ ALU2(N, T, OP, I0, I1, V, R1, R0, R2) \
+ ALU2(N, T, OP, I0, I1, V, R2, R1, R0)
+
+#define ALU(N, T, OP, I0, I1, V) \
+ ALU1(N, T, OP, I0, I1, V, v0, v1, v2) \
+ ALU1(N, T, OP, I0, I1, V, v0, v1, r0) \
+ ALU1(N, T, OP, I0, I1, V, v0, v1, r1) \
+ ALU1(N, T, OP, I0, I1, V, v0, v1, r2) \
+ ALU1(N, T, OP, I0, I1, V, v1, v2, r1) \
+ ALU1(N, T, OP, I0, I1, V, v1, v2, r2) \
+ ALU1(N, T, OP, I0, I1, V, v2, r0, r1) \
+ ALU1(N, T, OP, I0, I1, V, v2, r0, r2) \
+ ALU1(N, T, OP, I0, I1, V, r0, r1, r2)
+
+/* 3 carry set/propagate */
+
+/*
+ * r0 = i0
+ * r1 = i1
+ * r2 = 0
+ * r0 = r0 opc r1 <only want carry>
+ * r2 = r2 opx r2 <r2 must match v>
+ */
+#define ALUXII(N, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R2 0 \
+ OP##ci %R0 %R0 I1 \
+ OP##xi %R2 %R2 0 \
+ beqi OP##N##ii##R0##R1##R2 %R2 V \
+ calli @abort \
+OP##N##ii##R0##R1##R2:
+
+#define ALUXIR(N, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R2 0 \
+ OP##ci %R0 %R0 I1 \
+ OP##xr %R2 %R2 %R2 \
+ beqi OP##N##ir##R0##R1##R2 %R2 V \
+ calli @abort \
+OP##N##ir##R0##R1##R2:
+
+#define ALUXRI(N, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R1 I1 \
+ movi %R2 0 \
+ OP##cr %R0 %R0 %R1 \
+ OP##xi %R2 %R2 0 \
+ beqi OP##N##ri##R0##R1##R2 %R2 V \
+ calli @abort \
+OP##N##ri##R0##R1##R2:
+
+#define ALUXRR(N, OP, I0, I1, V, R0, R1, R2) \
+ movi %R0 I0 \
+ movi %R1 I1 \
+ movi %R2 0 \
+ OP##cr %R0 %R0 %R1 \
+ OP##xr %R2 %R2 %R2 \
+ beqi OP##N##rr##R0##R1##R2 %R2 V \
+ calli @abort \
+OP##N##rr##R0##R1##R2:
+
+#define ALUX2(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUXII(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUXIR(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUXRI(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUXRR(N, OP, I0, I1, V, R0, R1, R2)
+
+#define ALUX1(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUX2(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUX2(N, OP, I0, I1, V, R0, R2, R1)
+
+#define ALUX0(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUX1(N, OP, I0, I1, V, R0, R1, R2) \
+ ALUX1(N, OP, I0, I1, V, R1, R0, R2) \
+ ALUX1(N, OP, I0, I1, V, R2, R1, R0)
+
+#define ALUX(N, OP, I0, I1, V) \
+ ALUX0(N, OP, I0, I1, V, v0, v1, v2) \
+ ALUX0(N, OP, I0, I1, V, v0, v1, r0) \
+ ALUX0(N, OP, I0, I1, V, v0, v1, r1) \
+ ALUX0(N, OP, I0, I1, V, v0, v1, r2) \
+ ALUX0(N, OP, I0, I1, V, v1, v2, r0) \
+ ALUX0(N, OP, I0, I1, V, v1, v2, r1) \
+ ALUX0(N, OP, I0, I1, V, v1, v2, r2) \
+ ALUX0(N, OP, I0, I1, V, v2, r0, r1) \
+ ALUX0(N, OP, I0, I1, V, v2, r0, r2) \
+ ALUX0(N, OP, I0, I1, V, r0, r1, r2)
+
+/* unary int */
+
+#define UNR(N, OP, I, V, R0, R1) \
+ movi %R1 I \
+ OP##r %R0 %R1 \
+ beqi OP##N##R0##R1 %R0 V \
+ calli @abort \
+OP##N##R0##R1:
+
+#define UNRC(N, OP, I, V, R0, R1) \
+ movi %R0 I \
+ OP##r %R0 %R0 \
+ beqi OP##N##c##R0##R1 %R0 V \
+ calli @abort \
+OP##N##c##R0##R1:
+
+#define UN2(N, OP, I, V, R0, R1) \
+ UNR(N, OP, I, V, R0, R1) \
+ UNRC(N, OP, I, V, R0, R1)
+
+#define UN1(N, OP, I, V, R0, R1) \
+ UN2(N, OP, I, V, R0, R1) \
+ UN2(N, OP, I, V, R1, R0)
+
+#define UN(N, OP, I, V) \
+ UN1(N, OP, I, V, v0, v1) \
+ UN1(N, OP, I, V, v0, v2) \
+ UN1(N, OP, I, V, v0, r0) \
+ UN1(N, OP, I, V, v0, r1) \
+ UN1(N, OP, I, V, v0, r2) \
+ UN1(N, OP, I, V, v1, v2) \
+ UN1(N, OP, I, V, v1, r0) \
+ UN1(N, OP, I, V, v1, r1) \
+ UN1(N, OP, I, V, v1, r2) \
+ UN1(N, OP, I, V, v2, r0) \
+ UN1(N, OP, I, V, v2, r1) \
+ UN1(N, OP, I, V, v2, r2) \
+ UN1(N, OP, I, V, r0, r1) \
+ UN1(N, OP, I, V, r0, r2) \
+ UN1(N, OP, I, V, r1, r2)
+
+/* reg0 = reg1 op reg2 */
+#define FOPR(N, T, OP, I0, I1, V, F0, F1, F2) \
+ movi##T %F1 I0 \
+ movi##T %F2 I1 \
+ OP##r##T %F0 %F1 %F2 \
+ beqi##T OP##T##N##F0##F1##F2 %F0 V \
+ calli @abort \
+OP##T##N##F0##F1##F2:
+
+/* reg0 = reg0 op reg1 */
+#define FOPR0(N, T, OP, I0, I1, V, F0, F1, F2) \
+ movi##T %F0 I0 \
+ movi##T %F1 I1 \
+ OP##r##T %F0 %F0 %F1 \
+ beqi##T OP##T##N##0##F0##F1##F2 %F0 V \
+ calli @abort \
+OP##T##N##0##F0##F1##F2:
+
+/* reg1 = reg0 op reg1 */
+#define FOPR1(N, T, OP, I0, I1, V, F0, F1, F2) \
+ movi##T %F0 I0 \
+ movi##T %F1 I1 \
+ OP##r##T %F1 %F0 %F1 \
+ beqi##T OP##T##N##1##F0##F1##F2 %F1 V \
+ calli @abort \
+OP##T##N##1##F0##F1##F2:
+
+/* reg0 = reg1 op im */
+#define FOPI(N, T, OP, I0, I1, V, F0, F1, F2) \
+ movi##T %F1 I0 \
+ movi##T %F2 V \
+ OP##i##T %F0 %F1 I1 \
+ beqr##T OP##T##N##i##F0##F1##F2 %F0 %F2 \
+ calli @abort \
+OP##T##N##i##F0##F1##F2:
+
+/* reg0 = reg0 op im */
+#define FOPI0(N, T, OP, I0, I1, V, F0, F1, F2) \
+ movi##T %F0 I0 \
+ movi##T %F2 V \
+ OP##i##T %F0 %F0 I1 \
+ beqr##T OP##T##N##i0##F0##F1##F2 %F0 %F2 \
+ calli @abort \
+OP##T##N##i0##F0##F1##F2:
+
+#define FOP1(N, T, OP, I0, I1, V, F0, F1, F2) \
+ FOPR(N, T, OP, I0, I1, V, F0, F1, F2) \
+ FOPR0(N, T, OP, I0, I1, V, F0, F1, F2) \
+ FOPR1(N, T, OP, I0, I1, V, F0, F1, F2) \
+ FOPI(N, T, OP, I0, I1, V, F0, F1, F2) \
+ FOPI0(N, T, OP, I0, I1, V, F0, F1, F2)
+
+#define FOP(N, T, OP, I0, I1, V) \
+ FOP1(N, T, OP, I0, I1, V, f0, f1, f2) \
+ FOP1(N, T, OP, I0, I1, V, f0, f2, f3) \
+ FOP1(N, T, OP, I0, I1, V, f0, f3, f4) \
+ FOP1(N, T, OP, I0, I1, V, f0, f5, f1)
+
+/* unary float */
+
+#define FUNR(N, T, OP, I, V, R0, R1) \
+ movi##T %R1 I \
+ OP##r##T %R0 %R1 \
+ beqi##T OP##N##T##R0##R1 %R0 V \
+ calli @abort \
+OP##N##T##R0##R1:
+
+#define FUNRC(N, T, OP, I, V, R0, R1) \
+ movi##T %R0 I \
+ OP##r##T %R0 %R0 \
+ beqi##T OP##N##T##c##R0##R1 %R0 V \
+ calli @abort \
+OP##N##T##c##R0##R1:
+
+#define FUN2(N, T, OP, I, V, R0, R1) \
+ FUNR(N, T, OP, I, V, R0, R1) \
+ FUNRC(N, T, OP, I, V, R0, R1)
+
+#define FUN1(N, T, OP, I, V, R0, R1) \
+ FUN2(N, T, OP, I, V, R0, R1) \
+ FUN2(N, T, OP, I, V, R1, R0)
+
+#define FUN(N, T, OP, I, V) \
+ FUN1(N, T, OP, I, V, f0, f1) \
+ FUN1(N, T, OP, I, V, f0, f2) \
+ FUN1(N, T, OP, I, V, f0, f3) \
+ FUN1(N, T, OP, I, V, f0, f4) \
+ FUN1(N, T, OP, I, V, f0, f5)
diff --git a/check/alu_add.ok b/check/alu_add.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_add.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_add.tst b/check/alu_add.tst
new file mode 100644
index 0000000..221b6ac
--- /dev/null
+++ b/check/alu_add.tst
@@ -0,0 +1,46 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define ADD(N, I0, I1, V) ALU(N, , add, I0, I1, V)
+
+ ADD(0, 0x7fffffff, 1, 0x80000000)
+ ADD(1, 1, 0x7fffffff, 0x80000000)
+ ADD(2, 0x80000000, 1, 0x80000001)
+ ADD(3, 1, 0x80000000, 0x80000001)
+ ADD(4, 0x7fffffff, 0x80000000, 0xffffffff)
+ ADD(5, 0x80000000, 0x7fffffff, 0xffffffff)
+ ADD(6, 0x7fffffff, 0, 0x7fffffff)
+ ADD(7, 0, 0x7fffffff, 0x7fffffff)
+#if __WORDSIZE == 32
+ ADD(8, 0x7fffffff, 0xffffffff, 0x7ffffffe)
+ ADD(9, 0xffffffff, 0x7fffffff, 0x7ffffffe)
+ ADD(10, 0xffffffff, 0xffffffff, 0xfffffffe)
+#else
+ ADD(8, 0x7fffffff, 0xffffffff, 0x17ffffffe)
+ ADD(9, 0xffffffff, 0x7fffffff, 0x17ffffffe)
+ ADD(10, 0xffffffff, 0xffffffff, 0x1fffffffe)
+ ADD(11, 0x7fffffffffffffff, 1, 0x8000000000000000)
+ ADD(12, 1, 0x7fffffffffffffff, 0x8000000000000000)
+ ADD(13, 0x8000000000000000, 1, 0x8000000000000001)
+ ADD(14, 1, 0x8000000000000000, 0x8000000000000001)
+ ADD(15, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+ ADD(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+ ADD(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7ffffffffffffffe)
+ ADD(18, 0x7fffffffffffffff, 0x7fffffffffffffff, 0xfffffffffffffffe)
+ ADD(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffffe)
+#endif
+
+#undef ADD
+#define ADD(N, T, I0, I1, V) FOP(N, T, add, I0, I1, V)
+ ADD(0, _f, -0.5, 0.5, 0.0)
+ ADD(1, _f, 0.25, 0.75, 1.0)
+ ADD(0, _d, -0.5, 0.5, 0.0)
+ ADD(1, _d, 0.25, 0.75, 1.0)
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_and.ok b/check/alu_and.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_and.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_and.tst b/check/alu_and.tst
new file mode 100644
index 0000000..2f0da9a
--- /dev/null
+++ b/check/alu_and.tst
@@ -0,0 +1,35 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define AND(N, I0, I1, V) ALU(N, , and, I0, I1, V)
+
+ AND(0, 0x7fffffff, 1, 1)
+ AND(1, 1, 0x7fffffff, 1)
+ AND(2, 0x80000000, 1, 0)
+ AND(3, 1, 0x80000000, 0)
+ AND(4, 0x7fffffff, 0x80000000, 0)
+ AND(5, 0x80000000, 0x7fffffff, 0)
+ AND(6, 0x7fffffff, 0xffffffff, 0x7fffffff)
+ AND(7, 0xffffffff, 0x7fffffff, 0x7fffffff)
+ AND(8, 0xffffffff, 0xffffffff, 0xffffffff)
+ AND(9, 0x7fffffff, 0, 0)
+ AND(10, 0, 0x7fffffff, 0)
+#if __WORDSIZE == 64
+ AND(11, 0x7fffffffffffffff, 1, 1)
+ AND(12, 1, 0x7fffffffffffffff, 1)
+ AND(13, 0x8000000000000000, 1, 0)
+ AND(14, 1, 0x8000000000000000, 0)
+ AND(15, 0x7fffffffffffffff, 0x8000000000000000, 0)
+ AND(16, 0x8000000000000000, 0x7fffffffffffffff, 0)
+ AND(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff)
+ AND(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff)
+ AND(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_com.ok b/check/alu_com.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_com.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_com.tst b/check/alu_com.tst
new file mode 100644
index 0000000..4722813
--- /dev/null
+++ b/check/alu_com.tst
@@ -0,0 +1,32 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define COM(N, I0, V) UN(N, com, I0, V)
+
+#if __WORDSIZE == 32
+ COM(0, 0, 0xffffffff)
+ COM(1, 1, 0xfffffffe)
+ COM(2, 0xffffffff, 0)
+ COM(3, 0x80000000, 0x7fffffff)
+ COM(4, 0x7fffffff, 0x80000000)
+ COM(5, 0x80000001, 0x7ffffffe)
+#else
+ COM(0, 0, 0xffffffffffffffff)
+ COM(1, 1, 0xfffffffffffffffe)
+ COM(2, 0xffffffff, 0xffffffff00000000)
+ COM(3, 0x80000000, 0xffffffff7fffffff)
+ COM(4, 0x7fffffff, 0xffffffff80000000)
+ COM(5, 0x80000001, 0xffffffff7ffffffe)
+ COM(6, 0xffffffffffffffff, 0)
+ COM(7, 0x8000000000000000, 0x7fffffffffffffff)
+ COM(8, 0x7fffffffffffffff, 0x8000000000000000)
+ COM(9, 0x8000000000000001, 0x7ffffffffffffffe)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_div.ok b/check/alu_div.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_div.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_div.tst b/check/alu_div.tst
new file mode 100644
index 0000000..37c1a2b
--- /dev/null
+++ b/check/alu_div.tst
@@ -0,0 +1,82 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define DIV(N, I0, I1, V) ALU(N, , div, I0, I1, V)
+#define UDIV(N, I0, I1, V) ALU(N, _u, div, I0, I1, V)
+
+ DIV(0, 0x7fffffff, 1, 0x7fffffff)
+ DIV(1, 1, 0x7fffffff, 0)
+ DIV(2, 0x80000000, 1, 0x80000000)
+ DIV(3, 1, 0x80000000, 0)
+ DIV(4, 0x7fffffff, 2, 0x3fffffff)
+ DIV(5, 2, 0x7fffffff, 0)
+ DIV(6, 2, 0x80000000, 0)
+ DIV(7, 0x7fffffff, 0x80000000, 0)
+ DIV(8, 0, 0x7fffffff, 0)
+ DIV(9, 0xffffffff, 0xffffffff, 1)
+ UDIV(0, 0x7fffffff, 1, 0x7fffffff)
+ UDIV(1, 1, 0x7fffffff, 0)
+ UDIV(2, 0x80000000, 1, 0x80000000)
+ UDIV(3, 1, 0x80000000, 0)
+ UDIV(4, 0x7fffffff, 2, 0x3fffffff)
+ UDIV(5, 2, 0x7fffffff, 0)
+ UDIV(6, 0x80000000, 2, 0x40000000)
+ UDIV(7, 2, 0x80000000, 0)
+ UDIV(8, 0x7fffffff, 0x80000000, 0)
+ UDIV(9, 0x80000000, 0x7fffffff, 1)
+ UDIV(10,0, 0x7fffffff, 0)
+ UDIV(11,0x7fffffff, 0xffffffff, 0)
+ UDIV(12,0xffffffff, 0x7fffffff, 2)
+ UDIV(13,0xffffffff, 0xffffffff, 1)
+#if __WORDSIZE == 32
+ DIV(10, 0x80000000, 2, 0xc0000000)
+ DIV(11, 0x80000000, 0x7fffffff, 0xffffffff)
+ DIV(12, 0x7fffffff, 0xffffffff, 0x80000001)
+ DIV(13, 0xffffffff, 0x7fffffff, 0)
+#else
+ DIV(10, 0x80000000, 2, 0x40000000)
+ DIV(11, 0x80000000, 0x7fffffff, 1)
+ DIV(12, 0x7fffffff, 0xffffffff, 0)
+ DIV(13, 0xffffffff, 0x7fffffff, 2)
+ DIV(14, 0x7fffffffffffffff, 1, 0x7fffffffffffffff)
+ DIV(15, 1, 0x7fffffffffffffff, 0)
+ DIV(16, 0x8000000000000000, 1, 0x8000000000000000)
+ DIV(17, 1, 0x8000000000000000, 0)
+ DIV(18, 0x7fffffffffffffff, 2, 0x3fffffffffffffff)
+ DIV(19, 2, 0x7fffffffffffffff, 0)
+ DIV(20, 0x8000000000000000, 2, 0xc000000000000000)
+ DIV(21, 2, 0x8000000000000000, 0)
+ DIV(22, 0x7fffffffffffffff, 0x8000000000000000, 0)
+ DIV(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+ DIV(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001)
+ DIV(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0)
+ DIV(26, 0xffffffffffffffff, 0xffffffffffffffff, 1)
+ UDIV(14,0x7fffffffffffffff, 1, 0x7fffffffffffffff)
+ UDIV(15,1, 0x7fffffffffffffff, 0)
+ UDIV(16,0x8000000000000000, 1, 0x8000000000000000)
+ UDIV(17,1, 0x8000000000000000, 0)
+ UDIV(18,0x7fffffffffffffff, 2, 0x3fffffffffffffff)
+ UDIV(19,2, 0x7fffffffffffffff, 0)
+ UDIV(20,0x8000000000000000, 2, 0x4000000000000000)
+ UDIV(21,2, 0x8000000000000000, 0)
+ UDIV(22,0x7fffffffffffffff, 0x8000000000000000, 0)
+ UDIV(23,0x8000000000000000, 0x7fffffffffffffff, 1)
+ UDIV(24,0x7fffffffffffffff, 0xffffffffffffffff, 0)
+ UDIV(25,0xffffffffffffffff, 0x7fffffffffffffff, 2)
+ UDIV(26,0xffffffffffffffff, 0xffffffffffffffff, 1)
+#endif
+
+#undef DIV
+#define DIV(N, T, I0, I1, V) FOP(N, T, div, I0, I1, V)
+ DIV(0, _f, -0.5, 0.5, -1.0)
+ DIV(1, _f, 1.25, 0.5, 2.5)
+ DIV(0, _d, -0.5, 0.5, -1.0)
+ DIV(1, _d, 1.25, 0.5, 2.5)
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_lsh.ok b/check/alu_lsh.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_lsh.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_lsh.tst b/check/alu_lsh.tst
new file mode 100644
index 0000000..1b3118f
--- /dev/null
+++ b/check/alu_lsh.tst
@@ -0,0 +1,56 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define LSH(N, I0, I1, V) ALU(N, , lsh, I0, I1, V)
+
+ LSH(0, 0x7f, 1, 0xfe)
+ LSH(1, 0x7fff, 2, 0x1fffc)
+ LSH(2, 0x81, 16, 0x810000)
+ LSH(3, 0xff, 15, 0x7f8000)
+ LSH(4, 0x7fffffff, 0, 0x7fffffff)
+#if __WORDSIZE == 32
+ LSH(5, 0xffffffff, 8, 0xffffff00)
+ LSH(6, 0x7fffffff, 3, 0xfffffff8)
+ LSH(7, -0x7f, 31, 0x80000000)
+ LSH(8, -0x7fff, 30, 0x40000000)
+ LSH(9, -0x7fffffff, 29, 0x20000000)
+ LSH(10, 0x80000001, 28, 0x10000000)
+ LSH(11, 0x8001, 17, 0x20000)
+ LSH(12, 0x80000001, 18, 0x40000)
+ LSH(13, -0xffff, 24, 0x1000000)
+#else
+ LSH(5, 0xffffffff, 8, 0xffffffff00)
+ LSH(6, 0x7fffffff, 3, 0x3fffffff8)
+ LSH(7, -0x7f, 31, 0xffffffc080000000)
+ LSH(8, -0x7fff, 30, 0xffffe00040000000)
+ LSH(9, -0x7fffffff, 29, 0xf000000020000000)
+ LSH(10, 0x80000001, 28, 0x800000010000000)
+ LSH(11, 0x8001, 17, 0x100020000)
+ LSH(12, 0x80000001, 18, 0x2000000040000)
+ LSH(13, -0xffff, 24, 0xffffff0001000000)
+ LSH(14, 0x7f, 33, 0xfe00000000)
+ LSH(15, 0x7ffff, 34, 0x1ffffc00000000)
+ LSH(16, 0x7fffffff, 35, 0xfffffff800000000)
+ LSH(17, -0x7f, 63, 0x8000000000000000)
+ LSH(18, -0x7fff, 62, 0x4000000000000000)
+ LSH(19, -0x7fffffff, 61, 0x2000000000000000)
+ LSH(20, 0x80000001, 60, 0x1000000000000000)
+ LSH(21, 0x81, 48, 0x81000000000000)
+ LSH(22, 0x8001, 49, 0x2000000000000)
+ LSH(23, 0x80000001, 40, 0x10000000000)
+ LSH(24, 0xff, 47, 0x7f800000000000)
+ LSH(25, 0xffff0001, 56, 0x100000000000000)
+ LSH(26, 0xffffffff, 40, 0xffffff0000000000)
+ LSH(27, 0x7fffffffff, 33, 0xfffffffe00000000)
+ LSH(28, -0x7fffffffff, 63, 0x8000000000000000)
+ LSH(29, 0x8000000001, 48, 0x1000000000000)
+ LSH(30, 0xffffffffff, 47, 0xffff800000000000)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_mul.ok b/check/alu_mul.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_mul.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_mul.tst b/check/alu_mul.tst
new file mode 100644
index 0000000..edf9777
--- /dev/null
+++ b/check/alu_mul.tst
@@ -0,0 +1,58 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define MUL(N, I0, I1, V) ALU(N, , mul, I0, I1, V)
+
+ MUL(0, 0x7fffffff, 1, 0x7fffffff)
+ MUL(1, 1, 0x7fffffff, 0x7fffffff)
+ MUL(2, 0x80000000, 1, 0x80000000)
+ MUL(3, 1, 0x80000000, 0x80000000)
+ MUL(4, 0x7fffffff, 2, 0xfffffffe)
+ MUL(5, 2, 0x7fffffff, 0xfffffffe)
+ MUL(6, 0x7fffffff, 0, 0)
+ MUL(7, 0, 0x7fffffff, 0)
+#if __WORDSIZE == 32
+ MUL(8, 0x80000000, 2, 0)
+ MUL(9, 2, 0x80000000, 0)
+ MUL(10, 0x7fffffff, 0x80000000, 0x80000000)
+ MUL(11, 0x80000000, 0x7fffffff, 0x80000000)
+ MUL(12, 0x7fffffff, 0xffffffff, 0x80000001)
+ MUL(13, 0xffffffff, 0x7fffffff, 0x80000001)
+ MUL(14, 0xffffffff, 0xffffffff, 1)
+#else
+ MUL(8, 0x80000000, 2, 0x100000000)
+ MUL(9, 2, 0x80000000, 0x100000000)
+ MUL(10, 0x7fffffff, 0x80000000, 0x3fffffff80000000)
+ MUL(11, 0x80000000, 0x7fffffff, 0x3fffffff80000000)
+ MUL(12, 0x7fffffff, 0xffffffff, 0x7ffffffe80000001)
+ MUL(13, 0xffffffff, 0x7fffffff, 0x7ffffffe80000001)
+ MUL(14, 0xffffffff, 0xffffffff, 0xfffffffe00000001)
+ MUL(15, 0x7fffffffffffffff, 1, 0x7fffffffffffffff)
+ MUL(16, 1, 0x7fffffffffffffff, 0x7fffffffffffffff)
+ MUL(17, 0x8000000000000000, 1, 0x8000000000000000)
+ MUL(18, 1, 0x8000000000000000, 0x8000000000000000)
+ MUL(19, 0x7fffffffffffffff, 2, 0xfffffffffffffffe)
+ MUL(20, 2, 0x7fffffffffffffff, 0xfffffffffffffffe)
+ MUL(21, 0x8000000000000000, 2, 0)
+ MUL(22, 2, 0x8000000000000000, 0)
+ MUL(23, 0x7fffffffffffffff, 0x8000000000000000, 0x8000000000000000)
+ MUL(24, 0x8000000000000000, 0x7fffffffffffffff, 0x8000000000000000)
+ MUL(25, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001)
+ MUL(26, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000001)
+ MUL(27, 0xffffffffffffffff, 0xffffffffffffffff, 1)
+#endif
+
+#undef MUL
+#define MUL(N, T, I0, I1, V) FOP(N, T, mul, I0, I1, V)
+ MUL(0, _f, -0.5, 0.5, -0.25)
+ MUL(1, _f, 0.25, 0.75, 0.1875)
+ MUL(0, _d, -0.5, 0.5, -0.25)
+ MUL(1, _d, 0.25, 0.75, 0.1875)
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_neg.ok b/check/alu_neg.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_neg.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_neg.tst b/check/alu_neg.tst
new file mode 100644
index 0000000..73b1b30
--- /dev/null
+++ b/check/alu_neg.tst
@@ -0,0 +1,41 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define NEG(N, I, V) UN(N, neg, I, V)
+
+ NEG(0, 0, 0)
+#if __WORDSIZE == 32
+ NEG(1, 1, 0xffffffff)
+ NEG(2, 0xffffffff, 1)
+ NEG(3, 0x80000000, 0x80000000)
+ NEG(4, 0x7fffffff, 0x80000001)
+ NEG(5, 0x80000001, 0x7fffffff)
+#else
+ NEG(1, 1, 0xffffffffffffffff)
+ NEG(2, 0xffffffff, 0xffffffff00000001)
+ NEG(3, 0x80000000, 0xffffffff80000000)
+ NEG(4, 0x7fffffff, 0xffffffff80000001)
+ NEG(5, 0x80000001, 0xffffffff7fffffff)
+ NEG(6, 0xffffffffffffffff, 1)
+ NEG(7, 0x8000000000000000, 0x8000000000000000)
+ NEG(8, 0x7fffffffffffffff, 0x8000000000000001)
+#endif
+
+#undef NEG
+#define NEG(N, T, I, V) FUN(N, T, neg, I, V)
+ NEG(0, _f, 0.0, -0.0)
+ NEG(1, _f, 0.5, -0.5)
+ NEG(2, _f, $(1 / 0.0), $(-1.0 / 0))
+ NEG(3, _f, -1.25, 1.25)
+ NEG(0, _d, 0.0, -0.0)
+ NEG(1, _d, 0.5, -0.5)
+ NEG(2, _d, $(1.0 / 0), $(-1 / 0.0))
+ NEG(3, _d, -1.25, 1.25)
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_or.ok b/check/alu_or.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_or.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_or.tst b/check/alu_or.tst
new file mode 100644
index 0000000..31aa0c6
--- /dev/null
+++ b/check/alu_or.tst
@@ -0,0 +1,35 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define OR(N, I0, I1, V) ALU(N, , or, I0, I1, V)
+
+ OR(0, 0x7fffffff, 1, 0x7fffffff)
+ OR(1, 1, 0x7fffffff, 0x7fffffff)
+ OR(2, 0x80000000, 1, 0x80000001)
+ OR(3, 1, 0x80000000, 0x80000001)
+ OR(4, 0x7fffffff, 0x80000000, 0xffffffff)
+ OR(5, 0x80000000, 0x7fffffff, 0xffffffff)
+ OR(6, 0x7fffffff, 0xffffffff, 0xffffffff)
+ OR(7, 0xffffffff, 0x7fffffff, 0xffffffff)
+ OR(8, 0xffffffff, 0xffffffff, 0xffffffff)
+ OR(9, 0x7fffffff, 0, 0x7fffffff)
+ OR(10, 0, 0x7fffffff, 0x7fffffff)
+#if __WORDSIZE == 64
+ OR(11, 0x7fffffffffffffff, 1, 0x7fffffffffffffff)
+ OR(12, 1, 0x7fffffffffffffff, 0x7fffffffffffffff)
+ OR(13, 0x8000000000000000, 1, 0x8000000000000001)
+ OR(14, 1, 0x8000000000000000, 0x8000000000000001)
+ OR(15, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+ OR(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+ OR(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+ OR(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff)
+ OR(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_rem.ok b/check/alu_rem.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_rem.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_rem.tst b/check/alu_rem.tst
new file mode 100644
index 0000000..f6c6074
--- /dev/null
+++ b/check/alu_rem.tst
@@ -0,0 +1,75 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define REM(N, I0, I1, V) ALU(N, , rem, I0, I1, V)
+#define UREM(N, I0, I1, V) ALU(N, _u, rem, I0, I1, V)
+
+ REM(0, 0x7fffffff, 1, 0)
+ REM(1, 1, 0x7fffffff, 1)
+ REM(2, 0x80000000, 1, 0)
+ REM(3, 1, 0x80000000, 1)
+ REM(4, 0x7fffffff, 2, 1)
+ REM(5, 2, 0x7fffffff, 2)
+ REM(6, 0x80000000, 2, 0)
+ REM(7, 2, 0x80000000, 2)
+ REM(8, 0x7fffffff, 0x80000000, 0x7fffffff)
+ REM(9, 0, 0x7fffffff, 0)
+ REM(10, 0xffffffff, 0xffffffff, 0)
+ UREM(0, 0x7fffffff, 1, 0)
+ UREM(1, 1, 0x7fffffff, 1)
+ UREM(2, 0x80000000, 1, 0)
+ UREM(3, 1, 0x80000000, 1)
+ UREM(4, 0x7fffffff, 2, 1)
+ UREM(5, 2, 0x7fffffff, 2)
+ UREM(6, 0x80000000, 2, 0)
+ UREM(7, 2, 0x80000000, 2)
+ UREM(8, 0x7fffffff, 0x80000000, 0x7fffffff)
+ UREM(9, 0x80000000, 0x7fffffff, 1)
+ UREM(10,0, 0x7fffffff, 0)
+ UREM(11,0x7fffffff, 0xffffffff, 0x7fffffff)
+ UREM(12,0xffffffff, 0x7fffffff, 1)
+ UREM(13,0xffffffff, 0xffffffff, 0)
+
+#if __WORDSIZE == 32
+ REM(11, 0x80000000, 0x7fffffff, 0xffffffff)
+ REM(12, 0x7fffffff, 0xffffffff, 0)
+ REM(13, 0xffffffff, 0x7fffffff, 0xffffffff)
+#else
+ REM(11, 0x80000000, 0x7fffffff, 1)
+ REM(12, 0x7fffffff, 0xffffffff, 0x7fffffff)
+ REM(13, 0xffffffff, 0x7fffffff, 1)
+ REM(14, 0x7fffffffffffffff, 1, 0)
+ REM(15, 1, 0x7fffffffffffffff, 1)
+ REM(16, 0x8000000000000000, 1, 0)
+ REM(17, 1, 0x8000000000000000, 1)
+ REM(18, 0x7fffffffffffffff, 2, 1)
+ REM(19, 2, 0x7fffffffffffffff, 2)
+ REM(20, 0x8000000000000000, 2, 0)
+ REM(21, 2, 0x8000000000000000, 2)
+ REM(22, 0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff)
+ REM(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+ REM(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0)
+ REM(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff)
+ REM(26, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+ UREM(14,0x7fffffffffffffff, 1, 0)
+ UREM(15,1, 0x7fffffffffffffff, 1)
+ UREM(16,0x8000000000000000, 1, 0)
+ UREM(17,1, 0x8000000000000000, 1)
+ UREM(18,0x7fffffffffffffff, 2, 1)
+ UREM(19,2, 0x7fffffffffffffff, 2)
+ UREM(20,0x8000000000000000, 2, 0)
+ UREM(21,2, 0x8000000000000000, 2)
+ UREM(22,0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff)
+ UREM(23,0x8000000000000000, 0x7fffffffffffffff, 1)
+ UREM(24,0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff)
+ UREM(25,0xffffffffffffffff, 0x7fffffffffffffff, 1)
+ UREM(26,0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_rsh.ok b/check/alu_rsh.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_rsh.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_rsh.tst b/check/alu_rsh.tst
new file mode 100644
index 0000000..f32c3c9
--- /dev/null
+++ b/check/alu_rsh.tst
@@ -0,0 +1,84 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define RSH(N, I0, I1, V) ALU(N, , rsh, I0, I1, V)
+#define URSH(N, I0, I1, V) ALU(N, _u, rsh, I0, I1, V)
+
+ RSH(0, 0xfe, 1, 0x7f)
+ RSH(1, 0x1fffc, 2, 0x7fff)
+ RSH(2, 0x40000000, 30, 1)
+ RSH(3, 0x20000000, 29, 1)
+ RSH(4, 0x10000000, 28, 1)
+ RSH(5, 0x810000, 16, 0x81)
+ RSH(6, 0x20000, 17, 1)
+ RSH(7, 0x40000, 18, 1)
+ RSH(8, 0x7f8000, 15, 0xff)
+ RSH(9, 0x1000000, 24, 1)
+ RSH(10, 0x7fffffff, 0, 0x7fffffff)
+ URSH(0, 0xfe, 1, 0x7f)
+ URSH(1, 0x1fffc, 2, 0x7fff)
+ URSH(2, 0x80000000, 31, 1)
+ URSH(3, 0x40000000, 30, 1)
+ URSH(4, 0x20000000, 29, 1)
+ URSH(5, 0x10000000, 28, 1)
+ URSH(6, 0x810000, 16, 0x81)
+ URSH(7, 0x20000, 17, 1)
+ URSH(8, 0x40000, 18, 1)
+ URSH(9,0x7f8000, 15, 0xff)
+ URSH(10,0x1000000, 24, 1)
+ URSH(11,0xffffff00, 8, 0xffffff)
+ URSH(12,0x7fffffff, 0, 0x7fffffff)
+#if __WORDSIZE == 32
+ RSH(11, 0xfffffff8, 3, 0xffffffff)
+ RSH(12, 0x80000000, 31, 0xffffffff)
+ RSH(13, 0xffffff00, 8, 0xffffffff)
+ URSH(13,0xfffffff8, 3, 0x1fffffff)
+#else
+ RSH(11, 0x3fffffff8, 3, 0x7fffffff)
+ RSH(12, 0xffffffc080000000, 31, 0xffffffffffffff81)
+ RSH(13, 0xffffff00, 8, 0xffffff)
+ RSH(14, 0xfe00000000, 33, 0x7f)
+ RSH(15, 0x1ffffc00000000, 34, 0x7ffff)
+ RSH(16, 0xfffffff800000000, 29, 0xffffffffffffffc0)
+ RSH(17, 0x8000000000000000, 63, 0xffffffffffffffff)
+ RSH(18, 0x4000000000000000, 62, 1)
+ RSH(19, 0x2000000000000000, 61, 1)
+ RSH(20, 0x1000000000000000, 60, 1)
+ RSH(21, 0x81000000000000, 48, 0x81)
+ RSH(22, 0x2000000000000, 49, 1)
+ RSH(23, 0x10000000000, 40, 1)
+ RSH(24, 0x7f800000000000, 47, 0xff)
+ RSH(25, 0x100000000000000, 56, 1)
+ RSH(26, 0xffffff0000000000, 40, 0xffffffffffffffff)
+ RSH(27, 0xfffffffe00000000, 33, 0xffffffffffffffff)
+ RSH(28, 0x8000000000000001, 63, 0xffffffffffffffff)
+ RSH(29, 0x1000000000000, 48, 1)
+ RSH(30, 0xffff800000000000, 47, 0xffffffffffffffff)
+ URSH(13,0x3fffffff8, 3, 0x7fffffff)
+ URSH(14,0xffffffc080000000, 31, 0x1ffffff81)
+ URSH(15,0xfe00000000, 33, 0x7f)
+ URSH(16,0x1ffffc00000000, 34, 0x7ffff)
+ URSH(17,0xfffffff800000000, 29, 0x7ffffffc0)
+ URSH(18,0x8000000000000000, 63, 1)
+ URSH(19,0x4000000000000000, 62, 1)
+ URSH(20,0x2000000000000000, 61, 1)
+ URSH(21,0x1000000000000000, 60, 1)
+ URSH(22,0x81000000000000, 48, 0x81)
+ URSH(23,0x2000000000000, 49, 1)
+ URSH(24,0x10000000000, 40, 1)
+ URSH(25,0x7f800000000000, 47, 0xff)
+ URSH(26,0x100000000000000, 56, 1)
+ URSH(27,0xffffff0000000000, 40, 0xffffff)
+ URSH(28,0xfffffffe00000000, 33, 0x7fffffff)
+ URSH(29,0x8000000000000001, 63, 1)
+ URSH(30,0x1000000000000, 48, 1)
+ URSH(31,0xffff800000000000, 47, 0x1ffff)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_sub.ok b/check/alu_sub.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_sub.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_sub.tst b/check/alu_sub.tst
new file mode 100644
index 0000000..4e8fd3c
--- /dev/null
+++ b/check/alu_sub.tst
@@ -0,0 +1,48 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define SUB(N, I0, I1, V) ALU(N, , sub, I0, I1, V)
+
+ SUB(0, 0x7fffffff, 1, 0x7ffffffe)
+ SUB(2, 0x80000000, 1, 0x7fffffff)
+ SUB(3, 0x80000000, 0x7fffffff, 1)
+ SUB(4, 0xffffffff, 0xffffffff, 0)
+ SUB(5, 0xffffffff, 0x7fffffff, 0x80000000)
+ SUB(6, 0x7fffffff, 0, 0x7fffffff)
+#if __WORDSIZE == 32
+ SUB(7, 1, 0x7fffffff, 0x80000002)
+ SUB(8, 1, 0x80000000, 0x80000001)
+ SUB(9, 0x7fffffff, 0x80000000, 0xffffffff)
+ SUB(10, 0x7fffffff, 0xffffffff, 0x80000000)
+ SUB(11, 0, 0x7fffffff, 0x80000001)
+#else
+ SUB(7, 1, 0x7fffffff, 0xffffffff80000002)
+ SUB(8, 1, 0xffffffff80000000, 0x80000001)
+ SUB(9, 0x7fffffff, 0xffffffff80000000, 0xffffffff)
+ SUB(10, 0xffffffff7fffffff, 0xffffffffffffffff, 0xffffffff80000000)
+ SUB(11, 0, 0x7fffffff, 0xffffffff80000001)
+ SUB(12, 0x7fffffffffffffff, 1, 0x7ffffffffffffffe)
+ SUB(13, 1, 0x7fffffffffffffff, 0x8000000000000002)
+ SUB(14, 0x8000000000000000, 1, 0x7fffffffffffffff)
+ SUB(15, 1, 0x8000000000000000, 0x8000000000000001)
+ SUB(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+ SUB(17, 0x8000000000000000, 0x7fffffffffffffff, 1)
+ SUB(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000)
+ SUB(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000)
+ SUB(20, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+#undef SUB
+#define SUB(N, T, I0, I1, V) FOP(N, T, sub, I0, I1, V)
+ SUB(0, _f, -0.5, 0.5, -1.0)
+ SUB(1, _f, 0.25, 0.75, -0.5)
+ SUB(0, _d, -0.5, 0.5, -1.0)
+ SUB(1, _d, 0.25, 0.75, -0.5)
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alu_xor.ok b/check/alu_xor.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alu_xor.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alu_xor.tst b/check/alu_xor.tst
new file mode 100644
index 0000000..5c98e27
--- /dev/null
+++ b/check/alu_xor.tst
@@ -0,0 +1,35 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define XOR(N, I0, I1, V) ALU(N, , xor, I0, I1, V)
+
+ XOR(0, 0x7fffffff, 1, 0x7ffffffe)
+ XOR(1, 1, 0x7fffffff, 0x7ffffffe)
+ XOR(2, 0x80000000, 1, 0x80000001)
+ XOR(3, 1, 0x80000000, 0x80000001)
+ XOR(4, 0x7fffffff, 0x80000000, 0xffffffff)
+ XOR(5, 0x80000000, 0x7fffffff, 0xffffffff)
+ XOR(6, 0x7fffffff, 0xffffffff, 0x80000000)
+ XOR(7, 0xffffffff, 0x7fffffff, 0x80000000)
+ XOR(9, 0xffffffff, 0xffffffff, 0)
+ XOR(10, 0x7fffffff, 0, 0x7fffffff)
+ XOR(11, 0, 0x7fffffff, 0x7fffffff)
+#if __WORDSIZE == 64
+ XOR(12, 0x7fffffffffffffff, 1, 0x7ffffffffffffffe)
+ XOR(13, 1, 0x7fffffffffffffff, 0x7ffffffffffffffe)
+ XOR(14, 0x8000000000000000, 1, 0x8000000000000001)
+ XOR(15, 1, 0x8000000000000000, 0x8000000000000001)
+ XOR(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+ XOR(17, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+ XOR(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000)
+ XOR(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000)
+ XOR(20, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alux_add.ok b/check/alux_add.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alux_add.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alux_add.tst b/check/alux_add.tst
new file mode 100644
index 0000000..68cf8e5
--- /dev/null
+++ b/check/alux_add.tst
@@ -0,0 +1,48 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define ADDX(N, I0, I1, V) ALUX(N, add, I0, I1, V)
+
+ /* nothing */
+ ADDX(0, 0, 0, 0)
+#if __WORDSIZE == 32
+ /* carry */
+ ADDX(1, 0xffffffff, 0xffffffff, 1)
+ /* overflow */
+ ADDX(2, 0x7fffffff, 1, 0)
+ /* overflow */
+ ADDX(3, 0x7fffffff, 0x7fffffff, 0)
+ /* carry */
+ ADDX(4, 0x7fffffff, 0x80000000, 0)
+ /* carry+overflow */
+ ADDX(5, 0x80000000, 0x80000000, 1)
+#else
+ /* nothing */
+ ADDX(1, 0xffffffff, 0xffffffff, 0)
+ /* nothing */
+ ADDX(2, 0x7fffffff, 1, 0)
+ /* nothing */
+ ADDX(3, 0x7fffffff, 0x7fffffff, 0)
+ /* nothing */
+ ADDX(4, 0x7fffffff, 0x80000000, 0)
+ /* nothing */
+ ADDX(5, 0x80000000, 0x80000000, 0)
+ /* carry */
+ ADDX(6, 0xffffffffffffffff, 0xffffffffffffffff, 1)
+ /* overflow */
+ ADDX(7, 0x7fffffffffffffff, 1, 0)
+ /* overflow */
+ ADDX(8, 0x7fffffffffffffff, 0x7fffffffffffffff, 0)
+ /* overflow */
+ ADDX(9, 0x7fffffffffffffff, 0x8000000000000000, 0)
+ /* carry+overflow */
+ ADDX(10,0x8000000000000000, 0x8000000000000000, 1)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/alux_sub.ok b/check/alux_sub.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/alux_sub.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/alux_sub.tst b/check/alux_sub.tst
new file mode 100644
index 0000000..edef15f
--- /dev/null
+++ b/check/alux_sub.tst
@@ -0,0 +1,48 @@
+#include "alu.inc"
+
+.code
+ prolog
+
+#define SUBX(N, I0, I1, V) ALUX(N, sub, I0, I1, V)
+
+ /* nothing */
+ SUBX(0, 0, 0, 0)
+#if __WORDSIZE == 32
+ /* carry */
+ SUBX(1, 0x7fffffff, 0xffffffff, 0xffffffff)
+ /* overflow */
+ SUBX(2, 0x80000000, 1, 0)
+ /* carry */
+ SUBX(3, 0x7fffffff, 0x80000000, 0xffffffff)
+ /* overflow */
+ SUBX(4, 0x80000000, 0x7fffffff, 0)
+ /* carry+overflow */
+ SUBX(5, 1, 0x80000000, 0xffffffff)
+#else
+ /* carry */
+ SUBX(1, 0x7fffffff, 0xffffffff, -1)
+ /* nothing */
+ SUBX(2, 0x80000000, 1, 0)
+ /* carry */
+ SUBX(3, 0x7fffffff, 0x80000000, -1)
+ /* nothing */
+ SUBX(4, 0x80000000, 0x7fffffff, 0)
+ /* carry */
+ SUBX(5, 1, 0x80000000, -1)
+ /* carry */
+ SUBX(6, 0x7fffffffffffffff, 0xffffffffffffffff, -1)
+ /* overflow */
+ SUBX(7, 0x8000000000000000, 1, 0)
+ /* carry */
+ SUBX(8, 0x7fffffffffffffff, 0x8000000000000000, -1)
+ /* overflow */
+ SUBX(9, 0x8000000000000000, 0x7fffffffffffffff, 0)
+ /* carry+overflow */
+ SUBX(10,1, 0x8000000000000000, -1)
+#endif
+
+ prepare 1
+ pushargi ok
+ finishi @printf
+ ret
+ epilog
diff --git a/check/branch.ok b/check/branch.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/branch.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/branch.tst b/check/branch.tst
new file mode 100644
index 0000000..5fd54d5
--- /dev/null
+++ b/check/branch.tst
@@ -0,0 +1,562 @@
+#if __WORDSIZE == 64
+# define I7f 0x7fffffffffffffff
+# define I80 0x8000000000000000
+# define I81 0x8000000000000001
+# define Iff 0xffffffffffffffff
+#else
+# define I7f 0x7fffffff
+# define I80 0x80000000
+# define I81 0x80000001
+# define Iff 0xffffffff
+#endif
+
+.data 12
+ok:
+.c "ok\n"
+. $($NaN = 0.0 / 0.0)
+
+#define BOP(N, Ls, Rs, Lu, Ru, R0, R1) \
+ movi %R0 Ls \
+ movi %R1 Rs \
+ b##N##r N##r_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_##R0##_##R1: \
+ b##N##i N##i_##R0##_##R1 %R0 Rs \
+ calli @abort \
+N##i_##R0##_##R1: \
+ movi %R0 Lu \
+ movi %R1 Ru \
+ b##N##r_u N##r_u_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_u_##R0##_##R1: \
+ b##N##i_u N##i_u_##R0##_##R1 %R0 Ru \
+ calli @abort \
+N##i_u_##R0##_##R1: \
+ movi %R0 Ls \
+ movi %R1 Rs \
+ N##r %R0 %R0 %R1 \
+ beqi _##N##r_##R0##_##R1 %R0 1 \
+ calli @abort \
+_##N##r_##R0##_##R1: \
+ movi %R0 Ls \
+ N##i %R1 %R0 Rs \
+ beqi _##N##i_##R0##_##R1 %R1 1 \
+ calli @abort \
+_##N##i_##R0##_##R1: \
+ movi %R0 Lu \
+ movi %R1 Ru \
+ N##r_u %R0 %R0 %R1 \
+ beqi _##N##r_u_##R0##_##R1 %R0 1 \
+ calli @abort \
+_##N##r_u_##R0##_##R1: \
+ movi %R0 Lu \
+ N##i_u %R1 %R0 Ru \
+ beqi _##N##i_u_##R0##_##R1 %R1 1 \
+ calli @abort \
+_##N##i_u_##R0##_##R1:
+
+#define EB(N, L, R, R0, R1) \
+ movi %R0 L \
+ movi %R1 R \
+ b##N##r N##r_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_##R0##_##R1: \
+ b##N##i N##i_##R0##_##R1 %R0 R \
+ calli @abort \
+N##i_##R0##_##R1: \
+ movi %R0 L \
+ movi %R1 R \
+ N##r %R0 %R0 %R1 \
+ beqi _##N##r_##R0##_##R1 %R0 1 \
+ calli @abort \
+_##N##r_##R0##_##R1: \
+ movi %R0 L \
+ N##i %R1 %R0 R \
+ beqi _##N##i_##R0##_##R1 %R1 1 \
+ calli @abort \
+_##N##i_##R0##_##R1:
+
+#define XEB(N, L, R, R0, R1) \
+ movi %R0 L \
+ movi %R1 R \
+ b##N##r N##r_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_##R0##_##R1: \
+ b##N##i N##i_##R0##_##R1 %R0 R \
+ calli @abort \
+N##i_##R0##_##R1:
+
+#define XBOP(N, Ls, Rs, Lu, Ru, R0, R1) \
+ movi %R0 Ls \
+ movi %R1 Rs \
+ b##N##r N##r_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_##R0##_##R1: \
+ movi %R0 Ls \
+ b##N##i N##i_##R0##_##R1 %R0 Rs \
+ calli @abort \
+N##i_##R0##_##R1: \
+ movi %R0 Lu \
+ movi %R1 Ru \
+ b##N##r_u N##r_u_##R0##_##R1 %R0 %R1 \
+ calli @abort \
+N##r_u_##R0##_##R1: \
+ movi %R0 Lu \
+ b##N##i_u N##i_u_##R0##_##R1 %R0 Ru \
+ calli @abort \
+N##i_u_##R0##_##R1:
+
+#define BOPI(N, Ls, Rs, Lu, Ru) \
+ BOP(N, Ls, Rs, Lu, Ru, v0, v1) \
+ BOP(N, Ls, Rs, Lu, Ru, v0, v2) \
+ BOP(N, Ls, Rs, Lu, Ru, v0, r0) \
+ BOP(N, Ls, Rs, Lu, Ru, v0, r1) \
+ BOP(N, Ls, Rs, Lu, Ru, v0, r2) \
+ BOP(N, Ls, Rs, Lu, Ru, v1, v0) \
+ BOP(N, Ls, Rs, Lu, Ru, v1, v2) \
+ BOP(N, Ls, Rs, Lu, Ru, v1, r0) \
+ BOP(N, Ls, Rs, Lu, Ru, v1, r1) \
+ BOP(N, Ls, Rs, Lu, Ru, v1, r2) \
+ BOP(N, Ls, Rs, Lu, Ru, v2, v0) \
+ BOP(N, Ls, Rs, Lu, Ru, v2, v1) \
+ BOP(N, Ls, Rs, Lu, Ru, v2, r0) \
+ BOP(N, Ls, Rs, Lu, Ru, v2, r1) \
+ BOP(N, Ls, Rs, Lu, Ru, v2, r2) \
+ BOP(N, Ls, Rs, Lu, Ru, r0, v0) \
+ BOP(N, Ls, Rs, Lu, Ru, r0, v1) \
+ BOP(N, Ls, Rs, Lu, Ru, r0, v2) \
+ BOP(N, Ls, Rs, Lu, Ru, r0, r1) \
+ BOP(N, Ls, Rs, Lu, Ru, r0, r2) \
+ BOP(N, Ls, Rs, Lu, Ru, r1, v0) \
+ BOP(N, Ls, Rs, Lu, Ru, r1, v1) \
+ BOP(N, Ls, Rs, Lu, Ru, r1, v2) \
+ BOP(N, Ls, Rs, Lu, Ru, r1, r0) \
+ BOP(N, Ls, Rs, Lu, Ru, r1, r2) \
+ BOP(N, Ls, Rs, Lu, Ru, r2, v0) \
+ BOP(N, Ls, Rs, Lu, Ru, r2, v1) \
+ BOP(N, Ls, Rs, Lu, Ru, r2, v2) \
+ BOP(N, Ls, Rs, Lu, Ru, r2, r0) \
+ BOP(N, Ls, Rs, Lu, Ru, r2, r1)
+
+#define EBI(N, L, R) \
+ EB(N, L, R, v0, v1) \
+ EB(N, L, R, v0, v2) \
+ EB(N, L, R, v0, r0) \
+ EB(N, L, R, v0, r1) \
+ EB(N, L, R, v0, r2) \
+ EB(N, L, R, v1, v0) \
+ EB(N, L, R, v1, v2) \
+ EB(N, L, R, v1, r0) \
+ EB(N, L, R, v1, r1) \
+ EB(N, L, R, v1, r2) \
+ EB(N, L, R, v2, v0) \
+ EB(N, L, R, v2, v1) \
+ EB(N, L, R, v2, r0) \
+ EB(N, L, R, v2, r1) \
+ EB(N, L, R, v2, r2) \
+ EB(N, L, R, r0, v0) \
+ EB(N, L, R, r0, v1) \
+ EB(N, L, R, r0, v2) \
+ EB(N, L, R, r0, r1) \
+ EB(N, L, R, r0, r2) \
+ EB(N, L, R, r1, v0) \
+ EB(N, L, R, r1, v1) \
+ EB(N, L, R, r1, v2) \
+ EB(N, L, R, r1, r0) \
+ EB(N, L, R, r1, r2) \
+ EB(N, L, R, r2, v0) \
+ EB(N, L, R, r2, v1) \
+ EB(N, L, R, r2, v2) \
+ EB(N, L, R, r2, r0) \
+ EB(N, L, R, r2, r1)
+
+
+#define XEBI(N, L, R) \
+ XEB(N, L, R, v0, v1) \
+ XEB(N, L, R, v0, v2) \
+ XEB(N, L, R, v0, r0) \
+ XEB(N, L, R, v0, r1) \
+ XEB(N, L, R, v0, r2) \
+ XEB(N, L, R, v1, v0) \
+ XEB(N, L, R, v1, v2) \
+ XEB(N, L, R, v1, r0) \
+ XEB(N, L, R, v1, r1) \
+ XEB(N, L, R, v1, r2) \
+ XEB(N, L, R, v2, v0) \
+ XEB(N, L, R, v2, v1) \
+ XEB(N, L, R, v2, r0) \
+ XEB(N, L, R, v2, r1) \
+ XEB(N, L, R, v2, r2) \
+ XEB(N, L, R, r0, v0) \
+ XEB(N, L, R, r0, v1) \
+ XEB(N, L, R, r0, v2) \
+ XEB(N, L, R, r0, r1) \
+ XEB(N, L, R, r0, r2) \
+ XEB(N, L, R, r1, v0) \
+ XEB(N, L, R, r1, v1) \
+ XEB(N, L, R, r1, v2) \
+ XEB(N, L, R, r1, r0) \
+ XEB(N, L, R, r1, r2) \
+ XEB(N, L, R, r2, v0) \
+ XEB(N, L, R, r2, v1) \
+ XEB(N, L, R, r2, v2) \
+ XEB(N, L, R, r2, r0) \
+ XEB(N, L, R, r2, r1)
+
+#define XBOPI(N, Ls, Rs, Lu, Ru) \
+ XBOP(N, Ls, Rs, Lu, Ru, v0, v1) \
+ XBOP(N, Ls, Rs, Lu, Ru, v0, v2) \
+ XBOP(N, Ls, Rs, Lu, Ru, v0, r0) \
+ XBOP(N, Ls, Rs, Lu, Ru, v0, r1) \
+ XBOP(N, Ls, Rs, Lu, Ru, v0, r2) \
+ XBOP(N, Ls, Rs, Lu, Ru, v1, v0) \
+ XBOP(N, Ls, Rs, Lu, Ru, v1, v2) \
+ XBOP(N, Ls, Rs, Lu, Ru, v1, r0) \
+ XBOP(N, Ls, Rs, Lu, Ru, v1, r1) \
+ XBOP(N, Ls, Rs, Lu, Ru, v1, r2) \
+ XBOP(N, Ls, Rs, Lu, Ru, v2, v0) \
+ XBOP(N, Ls, Rs, Lu, Ru, v2, v1) \
+ XBOP(N, Ls, Rs, Lu, Ru, v2, r0) \
+ XBOP(N, Ls, Rs, Lu, Ru, v2, r1) \
+ XBOP(N, Ls, Rs, Lu, Ru, v2, r2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r0, v0) \
+ XBOP(N, Ls, Rs, Lu, Ru, r0, v1) \
+ XBOP(N, Ls, Rs, Lu, Ru, r0, v2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r0, r1) \
+ XBOP(N, Ls, Rs, Lu, Ru, r0, r2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r1, v0) \
+ XBOP(N, Ls, Rs, Lu, Ru, r1, v1) \
+ XBOP(N, Ls, Rs, Lu, Ru, r1, v2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r1, r0) \
+ XBOP(N, Ls, Rs, Lu, Ru, r1, r2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r2, v0) \
+ XBOP(N, Ls, Rs, Lu, Ru, r2, v1) \
+ XBOP(N, Ls, Rs, Lu, Ru, r2, v2) \
+ XBOP(N, Ls, Rs, Lu, Ru, r2, r0) \
+ XBOP(N, Ls, Rs, Lu, Ru, r2, r1)
+
+#define TBOPF(N, T, L, R) \
+ movi_##T %f0 L \
+ movi_##T %f1 R \
+ b##N##r##_##T N##r_##T %f0 %f1 \
+ calli @abort \
+N##r_##T: \
+ b##N##i##_##T N##i_##T %f0 R \
+ calli @abort \
+N##i_##T: \
+ movi_##T %f1 $NaN \
+ b##N##r##_##T N##r_##T##_##u %f0 %f1 \
+ jmpi N##r_##T##_##u0 \
+N##r_##T##_##u: \
+ calli @abort \
+N##r##_##T##_##u0: \
+ b##N##i##_##T N##i_##T##_##u %f0 $NaN \
+ jmpi N##i_##T##_##u0 \
+N##i##_##T##_##u: \
+ calli @abort \
+N##i##_##T##_##u0:
+#define BOPF(N, L, R) \
+ TBOPF(N, f, L, R) \
+ TBOPF(N, d, L, R)
+
+#define TUBOPF(N, T, L, R) \
+ movi_##T %f0 L \
+ movi_##T %f1 R \
+ b##N##r##_##T N##r_##T %f0 %f1 \
+ calli @abort \
+N##r_##T: \
+ b##N##i##_##T N##i_##T %f0 R \
+ calli @abort \
+N##i_##T: \
+ movi_##T %f1 $NaN \
+ b##N##r##_##T N##r_##T##_##u %f0 %f1 \
+ calli @abort \
+N##r_##T##_##u: \
+ b##N##i##_##T N##i_##T##_##u %f0 $NaN \
+ calli @abort \
+N##i##_##T##_##u:
+
+#define UBOPF(N, L, R) \
+ TUBOPF(N, f, L, R) \
+ TUBOPF(N, d, L, R)
+
+.code
+ prolog
+
+ movi %r0 -1
+ movi %r1 1
+ bltr xltr_r0_r1 %r0 %r1
+ calli @abort
+xltr_r0_r1:
+ blti xlti_r0_r1 %r0 1
+ calli @abort
+xlti_r0_r1:
+ movi %r0 1
+ movi %r1 -1
+ bltr_u xltru_r0_r1 %r0 %r1
+ calli @abort
+xltru_r0_r1:
+ blti_u xltiu_r0_r1 %r0 -1
+ calli @abort
+xltiu_r0_r1:
+ movi %r0 -1
+ movi %r1 -1
+ bler xler_r0_r1 %r0 %r1
+ calli @abort
+xler_r0_r1:
+ blti xlei_r0_r1 %r0 1
+ calli @abort
+xlei_r0_r1:
+ movi %r0 1
+ movi %r1 -1
+ bltr_u xlteu_r0_r1 %r0 %r1
+ calli @abort
+xlteu_r0_r1:
+ blei_u xleiu_r0_r1 %r0 -1
+ calli @abort
+xleiu_r0_r1:
+ movi %r0 32
+ movi %r1 32
+ beqr xeqr_r0_r1 %r0 %r1
+ calli @abort
+xeqr_r0_r1:
+ beqi xeqi_r0_r1 %r0 32
+ calli @abort
+xeqi_r0_r1:
+ movi %r0 -2
+ movi %r1 -2
+ bger xger_r0_r1 %r0 %r1
+ calli @abort
+xger_r0_r1:
+ bgei xgei_r0_r1 %r0 -2
+ calli @abort
+xgei_r0_r1:
+ movi %r0 2
+ movi %r1 2
+ bger_u xgeru_r0_r1 %r0 %r1
+ calli @abort
+xgeru_r0_r1:
+ bgei_u xgeiu_r0_r1 %r0 2
+ calli @abort
+xgeiu_r0_r1:
+ movi %r0 2
+ movi %r1 -2
+ bgtr xgtr_r0_r1 %r0 %r1
+ calli @abort
+xgtr_r0_r1:
+ bgti xgti_r0_r1 %r0 -2
+ calli @abort
+xgti_r0_r1:
+ movi %r0 -2
+ movi %r1 2
+ bgtr_u xgtru_r0_r1 %r0 %r1
+ calli @abort
+xgtru_r0_r1:
+ bgti_u xgtiu_r0_r1 %r0 2
+ calli @abort
+xgtiu_r0_r1:
+ movi %r0 -3
+ movi %r1 3
+ bner xner_r0_r1 %r0 %r1
+ calli @abort
+xner_r0_r1:
+ bnei xnei_r0_r1 %r0 3
+ calli @abort
+xnei_r0_r1:
+ movi %r0 1
+ movi %r1 3
+ bmsr xmsr_r0_r1 %r0 %r1
+ calli @abort
+xmsr_r0_r1:
+ bmsi xmsi_r0_r1 %r0 3
+ calli @abort
+xmsi_r0_r1:
+ movi %r0 1
+ movi %r1 2
+ bmcr xmcr_r0_r1 %r0 %r1
+ calli @abort
+xmcr_r0_r1:
+ bmci xmci_r0_r1 %r0 2
+ calli @abort
+xmci_r0_r1:
+ movi %r0 I7f
+ movi %r1 1
+ boaddr xoaddr_r0_r1 %r0 %r1
+ calli @abort
+xoaddr_r0_r1:
+ movi %r0 Iff
+ movi %r1 1
+ boaddr_u xoaddr_u_r0_r1 %r0 %r1
+ calli @abort
+xoaddr_u_r0_r1:
+ movi %r0 I7f
+ boaddi xoaddi_r0_r1 %r0 1
+ calli @abort
+xoaddi_r0_r1:
+ movi %r0 Iff
+ boaddi_u xoaddi_u_r0_r1 %r0 1
+ calli @abort
+xoaddi_u_r0_r1:
+ movi %r0 I80
+ movi %r1 1
+ bxaddr xxaddr_r0_r1 %r0 %r1
+ calli @abort
+xxaddr_r0_r1:
+ movi %r0 I80
+ bxaddi xxaddi_r0_r1 %r0 1
+ calli @abort
+xxaddi_r0_r1:
+ movi %r0 I7f
+ movi %r1 1
+ bxaddr_u xxaddr_u_r0_r1 %r0 %r1
+ calli @abort
+xxaddr_u_r0_r1:
+ movi %r0 I7f
+ bxaddi_u xxaddi_u_r0_r1 %r0 1
+ calli @abort
+xxaddi_u_r0_r1:
+ movi %r0 I80
+ movi %r1 1
+ bosubr xosubr_r0_r1 %r0 %r1
+ calli @abort
+xosubr_r0_r1:
+ movi %r0 0
+ movi %r1 1
+ bosubr_u xosubr_u_r0_r1 %r0 %r1
+ calli @abort
+xosubr_u_r0_r1:
+ movi %r0 I80
+ bosubi xosubi_r0_r1 %r0 1
+ calli @abort
+xosubi_r0_r1:
+ movi %r0 0
+ bosubi_u xosubi_u_r0_r1 %r0 1
+ calli @abort
+xosubi_u_r0_r1:
+ movi %r0 I81
+ movi %r1 1
+ bxsubr xxsubr_r0_r1 %r0 %r1
+ calli @abort
+xxsubr_r0_r1:
+ movi %r0 I81
+ bxsubi xxsubi_r0_r1 %r0 1
+ calli @abort
+xxsubi_r0_r1:
+ movi %r0 I80
+ movi %r1 1
+ bxsubr_u xxsubr_u_r0_r1 %r0 %r1
+ calli @abort
+xxsubr_u_r0_r1:
+ movi %r0 I80
+ bxsubi_u xxsubi_u_r0_r1 %r0 1
+ calli @abort
+xxsubi_u_r0_r1:
+ movi_f %f0 1
+ movi_f %f1 2
+ bltr_f xltr_f_f0_f1 %f0 %f1
+ calli @abort
+xltr_f_f0_f1:
+ blti_f xlti_f_f0_f1 %f0 2
+ calli @abort
+xlti_f_f0_f1:
+ movi_f %f0 -1
+ movi_f %f1 -1
+ bler_f xler_f_f0_f1 %f0 %f1
+ calli @abort
+xler_f_f0_f1:
+ blei_f xlei_f_f0_f1 %f0 -1
+ calli @abort
+xlei_f_f0_f1:
+ movi_f %f0 -2
+ movi_f %f1 -2
+ beqr_f xeqr_f_f0_f1 %f0 %f1
+ calli @abort
+xeqr_f_f0_f1:
+ beqi_f xeqi_f_f0_f1 %f0 -2
+ calli @abort
+xeqi_f_f0_f1:
+ movi_f %f0 -3
+ movi_f %f1 -3
+ bger_f xger_f_f0_f1 %f0 %f1
+ calli @abort
+xger_f_f0_f1:
+ bgei_f xgei_f_f0_f1 %f0 -3
+ calli @abort
+xgei_f_f0_f1:
+ movi_f %f0 2
+ movi_f %f1 1
+ bgtr_f xgtr_f_f0_f1 %f0 %f1
+ calli @abort
+xgtr_f_f0_f1:
+ bgti_f xgti_f_f0_f1 %f0 1
+ calli @abort
+xgti_f_f0_f1:
+ movi_f %f0 0
+ movi_f %f1 2
+ bner_f xner_f_f0_f1 %f0 %f1
+ calli @abort
+xner_f_f0_f1:
+ bnei_f xnei_f_f0_f1 %f0 2
+ calli @abort
+xnei_f_f0_f1:
+
+ BOPI(lt, -1, 1, 1, -1)
+ BOPI(le, -1, -1, 1, 1)
+ EBI(eq, 32, 32)
+ BOPI(ge, -2, -2, 2, 2)
+ BOPI(gt, 2, -2, -2, 2)
+ EBI(ne, 3, -3)
+ XEBI(ms, 1, 3)
+ XEBI(mc, 1, 2)
+ XBOPI(oadd, I7f, 1, Iff, 1)
+ XBOPI(xadd, I80, 1, I7f, 1)
+ XBOPI(osub, I80, 1, 0, 1)
+ XBOPI(xsub, I81, 1, I80, 1)
+ BOPF(lt, 1, 2)
+ BOPF(le, 2, 2)
+ BOPF(eq, 3, 3)
+ BOPF(ge, 3, 3)
+ BOPF(gt, 4, 3)
+ BOPF(ne, 4, 3)
+ UBOPF(unlt, 1, 2)
+ UBOPF(unle, 2, 2)
+ UBOPF(uneq, 3, 3)
+ UBOPF(unge, 3, 3)
+ UBOPF(ungt, 4, 3)
+ BOPF(ltgt, 4, 3)
+ movi_f %f0 5
+ movi_f %f1 5
+ bordr_f ordr_f %f0 %f1
+ calli @abort
+ordr_f:
+ bordi_f ordi_f %f0 1
+ calli @abort
+ordi_f:
+ bordi_f ordi_f_u %f0 $NaN
+ jmpi ordi_f_u0
+ordi_f_u:
+ calli @abort
+ordi_f_u0:
+ movi_f %f0 5
+ movi_f %f1 5
+ bunordr_f unordr_f %f0 %f1
+ jmpi unordr_f_0
+unordr_f:
+ calli @abort
+unordr_f_0:
+ bunordi_f unordi_f %f0 1
+ jmpi unordi_f_0
+unordi_f:
+ calli @abort
+unordi_f_0:
+ bunordi_f unordi_f_1 %f0 $NaN
+ calli @abort
+unordi_f_1:
+
+ // just to know did not crash or abort
+ prepare 1
+ pushargi ok
+ finishi @printf
+
+ ret
+ epilog
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index afbfd10..d2db6c2 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -44,9 +44,15 @@
#define jit_size(vector) (sizeof(vector) / sizeof((vector)[0]))
+#define jit_reg_free_p(regno) \
+ (!jit_regset_tstbit(_jit->reglive, regno) && \
+ !jit_regset_tstbit(_jit->regarg, regno) && \
+ !jit_regset_tstbit(_jit->regsav, regno))
+
/*
* Private jit_class bitmasks
*/
+#define jit_class_named 0x00400000 /* hit must be the named reg */
#define jit_class_nospill 0x00800000 /* hint to fail if need spill */
#define jit_class_sft 0x01000000 /* not a hardware register */
#define jit_class_rg8 0x04000000 /* x86 8 bits */
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index 948f260..1922a6c 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -409,7 +409,7 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused;
# define CC_MVN(cc,rd,rm) corrr(cc,ARM_MVN,0,rd,rm)
# define MVN(rd,rm) CC_MVN(ARM_CC_AL,rd,rm)
# define T1_MVN(rd,rm) is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
-# define T2_MVN(rd,rm) torrr(THUMB2_MVN,rd,_R15_REGNO,rm)
+# define T2_MVN(rd,rm) torrr(THUMB2_MVN,_R15_REGNO,rd,rm)
# define CC_MVNI(cc,rd,im) corri(cc,ARM_MVN|ARM_I,0,rd,im)
# define MVNI(rd,im) CC_MVNI(ARM_CC_AL,rd,im)
# define T2_MVNI(rd,im) torri(THUMB2_MVNI,_R15_REGNO,rd,im)
@@ -816,10 +816,10 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused;
# define T2_POP(im) tpp(THUMB2_POP,im)
# define jit_get_reg_args() \
do { \
- (void)jit_get_reg(_R0|jit_class_gpr); \
- (void)jit_get_reg(_R1|jit_class_gpr); \
- (void)jit_get_reg(_R2|jit_class_gpr); \
- (void)jit_get_reg(_R3|jit_class_gpr); \
+ (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr); \
+ (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr); \
+ (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr); \
+ (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr); \
} while (0)
# define jit_unget_reg_args() \
do { \
diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c
index cc311df..ad0d12d 100644
--- a/lib/jit_arm-swf.c
+++ b/lib/jit_arm-swf.c
@@ -156,8 +156,8 @@ static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define swf_muli_d(r0,r1,i0) swf_ddd_(__aeabi_dmul,r0,r1,i0)
# define swf_divr_f(r0,r1,r2) swf_fff(__aeabi_fdiv,r0,r1,r2)
# define swf_divi_f(r0,r1,i0) swf_fff_(__aeabi_fdiv,r0,r1,i0)
-# define swf_divr_d(r0,r1,r2) swf_ddd(__aeabi_dsub,r0,r1,r2)
-# define swf_divi_d(r0,r1,i0) swf_ddd_(__aeabi_dsub,r0,r1,i0)
+# define swf_divr_d(r0,r1,r2) swf_ddd(__aeabi_ddiv,r0,r1,r2)
+# define swf_divi_d(r0,r1,i0) swf_ddd_(__aeabi_ddiv,r0,r1,i0)
# define swf_ltr_f(r0,r1,r2) swf_iff(__aeabi_fcmplt,r0,r1,r2)
# define swf_lti_f(r0,r1,i0) swf_iff_(__aeabi_fcmplt,r0,r1,i0)
# define swf_ltr_d(r0,r1,r2) swf_idd(__aeabi_dcmplt,r0,r1,r2)
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 319703e..b706440 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -37,10 +37,6 @@
#define jit_exchange_p() 1
/* FIXME is it really required to not touch _R10? */
-#define jit_reg_free_p(regno) \
- (!jit_regset_tstbit(_jit->reglive, regno) && \
- !jit_regset_tstbit(_jit->regarg, regno) && \
- !jit_regset_tstbit(_jit->regsav, regno))
/*
* Types
@@ -290,28 +286,28 @@ _jit_reti(jit_state_t *_jit, jit_word_t u)
void
_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
{
- jit_movr_f(JIT_RET, u);
+ jit_movr_f(JIT_FRET, u);
jit_ret();
}
void
_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
{
- jit_movi_f(JIT_RET, u);
+ jit_movi_f(JIT_FRET, u);
jit_ret();
}
void
_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
{
- jit_movr_d(JIT_RET, u);
+ jit_movr_d(JIT_FRET, u);
jit_ret();
}
void
_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
{
- jit_movi_d(JIT_RET, u);
+ jit_movi_d(JIT_FRET, u);
jit_ret();
}
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index bef8d4c..ed02008 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -40,7 +40,7 @@
# define stxi(u, v, w) stxi_l(u, v, w)
# define can_sign_extend_int_p(im) \
(((im) >= 0 && (long)(im) <= 0x7fffffffL) || \
- ((im) < 0 && (long)(im) >= -0x80000000L))
+ ((im) < 0 && (long)(im) > -0x80000000L))
# define can_zero_extend_int_p(im) \
((im) >= 0 && (im) < 0x80000000L)
# define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0)
@@ -975,8 +975,8 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
lea(-i0, r1, _NOREG, _SCL1, r0);
}
else if (r0 != r1) {
- movi(r0, i0);
- isubr(r0, r1);
+ movi(r0, -i0);
+ iaddr(r0, r1);
}
else {
reg = jit_get_reg(jit_class_gpr);
@@ -1064,7 +1064,7 @@ _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0)) {
- rex(0, 1, r1, _NOREG, r0);
+ rex(0, 1, r0, _NOREG, r1);
if ((jit_int8_t)i0 == i0) {
ic(0x6b);
mrm(0x03, r7(r0), r7(r1));
@@ -1147,16 +1147,25 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
{
jit_int32_t div;
jit_int32_t reg;
-
- if (r0 != _RDX_REGNO)
- (void)jit_get_reg(_RDX|jit_class_gpr);
- if (r0 != _RAX_REGNO)
- (void)jit_get_reg(_RAX|jit_class_gpr);
+ jit_int32_t set;
+ jit_int32_t use;
+
+ set = use = 0;
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO && r2 != _RDX_REGNO)
+ set |= 1 << _RDX_REGNO;
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO && r2 != _RAX_REGNO)
+ set |= 1 << _RAX_REGNO;
+ if (set & (1 <<_RDX_REGNO))
+ (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named);
+ if (set & (1 << _RAX_REGNO))
+ (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named);
if (r2 == _RAX_REGNO) {
if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
- reg = jit_get_reg(r1 == _RCX_REGNO ? _RBX : _RCX);
+ reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+ jit_class_gpr|jit_class_named);
+ use = 1;
div = rn(reg);
movr(div, _RAX_REGNO);
if (r1 != _RAX_REGNO)
@@ -1172,13 +1181,14 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
movr(_RAX_REGNO, r1);
}
div = r0;
- reg = 0;
}
}
else if (r2 == _RDX_REGNO) {
if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
- reg = jit_get_reg(r1 == _RCX_REGNO ? _RBX : _RCX);
+ reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+ jit_class_gpr|jit_class_named);
+ use = 1;
div = rn(reg);
movr(div, _RDX_REGNO);
if (r1 != _RAX_REGNO)
@@ -1189,14 +1199,12 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
movr(_RAX_REGNO, r1);
movr(r0, _RDX_REGNO);
div = r0;
- reg = 0;
}
}
else {
if (r1 != _RAX_REGNO)
movr(_RAX_REGNO, r1);
div = r2;
- reg = 0;
}
if (sign) {
@@ -1208,19 +1216,21 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
idivr_u(div);
}
- if (reg)
+ if (use)
jit_unget_reg(reg);
if (r0 != _RAX_REGNO) {
if (divide)
movr(r0, _RAX_REGNO);
- jit_unget_reg(_RAX);
}
if (r0 != _RDX_REGNO) {
if (!divide)
movr(r0, _RDX_REGNO);
- jit_unget_reg(_RDX);
}
+ if (set & (1 <<_RDX_REGNO))
+ jit_unget_reg(_RDX);
+ if (set & (1 << _RAX_REGNO))
+ jit_unget_reg(_RAX);
}
static void
@@ -1229,6 +1239,8 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
{
jit_int32_t reg;
jit_int32_t div;
+ jit_int32_t set;
+ jit_int32_t use;
if (divide) {
switch (i0) {
@@ -1275,23 +1287,28 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
return;
}
- if (r0 != _RDX_REGNO)
- (void)jit_get_reg(_RDX|jit_class_gpr);
- if (r0 != _RAX_REGNO)
- (void)jit_get_reg(_RAX|jit_class_gpr);
+ set = use = 0;
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+ set |= 1 << _RDX_REGNO;
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+ set |= 1 << _RAX_REGNO;
+ if (set & (1 <<_RDX_REGNO))
+ (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named);
+ if (set & (1 << _RAX_REGNO))
+ (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named);
- if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
+ if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
- reg = jit_get_reg(_RCX);
+ reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+ jit_class_gpr|jit_class_named);
+ use = 1;
div = rn(reg);
}
- else {
- reg = 0;
+ else
div = r0;
- }
movi(div, i0);
- movr(_RAX, r1);
+ movr(_RAX_REGNO, r1);
if (sign) {
sign_extend_rdx_rax();
@@ -1302,19 +1319,21 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
idivr_u(div);
}
- if (reg)
+ if (use)
jit_unget_reg(reg);
if (r0 != _RAX_REGNO) {
if (divide)
movr(r0, _RAX_REGNO);
- jit_unget_reg(_RAX);
}
if (r0 != _RDX_REGNO) {
if (!divide)
movr(r0, _RDX_REGNO);
- jit_unget_reg(_RDX);
}
+ if (set & (1 <<_RDX_REGNO))
+ jit_unget_reg(_RDX);
+ if (set & (1 << _RAX_REGNO))
+ jit_unget_reg(_RAX);
}
static void
@@ -1386,7 +1405,7 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
else if (r0 != r1) {
movi(r0, i0);
- ixorr(r0, r1);
+ iorr(r0, r1);
}
else {
reg = jit_get_reg(jit_class_gpr);
@@ -1448,6 +1467,7 @@ _rotshr(jit_state_t *_jit, jit_int32_t code,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
+ jit_int32_t use;
if (r0 == _RCX_REGNO) {
reg = jit_get_reg(jit_class_gpr);
@@ -1459,13 +1479,30 @@ _rotshr(jit_state_t *_jit, jit_int32_t code,
jit_unget_reg(reg);
}
else if (r2 != _RCX_REGNO) {
- reg = jit_get_reg(jit_class_gpr);
- movr(rn(reg), _RCX_REGNO);
- movr(_RCX_REGNO, r2);
- movr(r0, r1);
+ use = !jit_reg_free_p(_RCX);
+ if (use) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), _RCX_REGNO);
+ }
+ else
+ reg = 0;
+ if (r1 == _RCX_REGNO) {
+ if (r0 == r2)
+ xchgr(r0, _RCX_REGNO);
+ else {
+ movr(r0, r1);
+ movr(_RCX_REGNO, r2);
+ }
+ }
+ else {
+ movr(_RCX_REGNO, r2);
+ movr(r0, r1);
+ }
irotshr(code, r0);
- movr(_RCX_REGNO, rn(reg));
- jit_unget_reg(reg);
+ if (use) {
+ movr(_RCX_REGNO, rn(reg));
+ jit_unget_reg(reg);
+ }
}
else {
movr(r0, r1);
diff --git a/lib/lightning.c b/lib/lightning.c
index ac06ffe..69abeeb 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -148,17 +148,18 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
jit_int32_t spec;
jit_int32_t regno;
- /* if asking for an explicit register value, assume it will
- * properly handle the case of the register also being an
- * argument for the instruction, or the register value
- * being live */
spec = regspec & ~(jit_class_chk|jit_class_nospill);
- if ((regno = jit_regno(spec))) {
+ if (spec & jit_class_named) {
+ regno = jit_regno(spec);
if (jit_regset_tstbit(_jit->regsav, regno))
/* fail if register is spilled */
goto fail;
- if (jit_regset_tstbit(_jit->regarg, regno)) {
+ if (jit_regset_tstbit(_jit->regarg, regno))
+ /* fail if register is an argument to current instruction */
+ goto fail;
+ if (jit_regset_tstbit(_jit->reglive, regno)) {
if (regspec & jit_class_nospill)
+ /* fail if register is live and should not spill/reload */
goto fail;
goto spill;
}
@@ -1453,6 +1454,12 @@ _thread_jumps(jit_state_t *_jit)
case jit_code_callr: case jit_code_calli:
/* non optimizable jump like code */
break;
+ case jit_code_beqr_f: case jit_code_beqi_f:
+ case jit_code_beqr_d: case jit_code_beqi_d:
+ case jit_code_bltgtr_f: case jit_code_bltgti_f:
+ case jit_code_bltgtr_d: case jit_code_bltgti_d:
+ /* non optimizable jump code */
+ break;
default:
mask = jit_classify(node->code);
if (mask & jit_cc_a0_jmp) {
@@ -1633,8 +1640,10 @@ reverse_jump_code(jit_code_t code)
case jit_code_blti_f: return (jit_code_bungei_f);
case jit_code_bler_f: return (jit_code_bungtr_f);
case jit_code_blei_f: return (jit_code_bungti_f);
+#if 0
case jit_code_beqr_f: return (jit_code_bltgtr_f);
case jit_code_beqi_f: return (jit_code_bltgti_f);
+#endif
case jit_code_bger_f: return (jit_code_bunltr_f);
case jit_code_bgei_f: return (jit_code_bunlti_f);
case jit_code_bgtr_f: return (jit_code_bunler_f);
@@ -1651,8 +1660,10 @@ reverse_jump_code(jit_code_t code)
case jit_code_bungei_f: return (jit_code_blti_f);
case jit_code_bungtr_f: return (jit_code_bler_f);
case jit_code_bungti_f: return (jit_code_blei_f);
+#if 0
case jit_code_bltgtr_f: return (jit_code_beqr_f);
case jit_code_bltgti_f: return (jit_code_beqi_f);
+#endif
case jit_code_bordr_f: return (jit_code_bunordr_f);
case jit_code_bordi_f: return (jit_code_bunordi_f);
case jit_code_bunordr_f:return (jit_code_bordr_f);
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月27日 22:17:15 +0000

AltStyle によって変換されたページ (->オリジナル) /