author | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2013年01月05日 16:14:59 -0200 |
---|---|---|
committer | pcpa <paulo.cesar.pereira.de.andrade@gmail.com> | 2013年01月05日 16:14:59 -0200 |
commit | 9d2566ee0ae56ea9b1771eba17e72db8a206d2f1 (patch) | |
tree | aad6fc36f54e3beed00c06c5d400332721735b42 | |
parent | d7f94379ed7dba2387471957bd584cc71a0e9046 (diff) | |
download | lightning-9d2566ee0ae56ea9b1771eba17e72db8a206d2f1.tar.gz |
-rw-r--r-- | ChangeLog | 47 | ||||
-rw-r--r-- | check/Makefile.am | 11 | ||||
-rw-r--r-- | check/ccall.c | 894 | ||||
-rw-r--r-- | check/ccall.ok | 1 | ||||
-rw-r--r-- | include/lightning.h | 2 | ||||
-rw-r--r-- | include/lightning/jit_ppc.h | 34 | ||||
-rw-r--r-- | include/lightning/jit_private.h | 1 | ||||
-rw-r--r-- | lib/jit_arm.c | 1 | ||||
-rw-r--r-- | lib/jit_mips.c | 137 | ||||
-rw-r--r-- | lib/jit_ppc-cpu.c | 22 | ||||
-rw-r--r-- | lib/jit_ppc.c | 173 | ||||
-rw-r--r-- | lib/jit_x86.c | 13 | ||||
-rw-r--r-- | lib/lightning.c | 53 |
@@ -1,3 +1,50 @@ +2013年01月05日 Paulo Andrade <pcpa@gnu.org> + + * check/cccall.c, check/ccall.ok: New test case to validate + interleaved calls from/to C code and jit. + + * check/Makefile.am: Update for the new ccall test case. + + * include/lightning.h, lib/lightning.c: Add the new jit_address + call that returns the real/final address of a "note" in the + generated jit. It requires a jit_node_t as returned by the + jit_note call, and is only valid after calling jit_emit. + Add an intermediate solution to properly handle arm + soft and softfp modes that move a double to an integer register + pair. Currently it just adds extra tests for the condition, + but the proper solution should be to have extra lightning + codes for these conditions, codes which should be only used + by the backends that need it, and merged with the existing + jit_pusharg*_{f,d}. + + * include/lightning/jit_private.h: Add new jit_state_t flag + to know it finished jit_emit, so that calls to jit_address + are valid. + + * lib/jit_mips.c: Correct abi implementation so that the + new ccall test case pass. Major problem was using + _jit->function.self.arg{i,f} as boolean values, but that + would cause lightning.c:patch_registers() to incorrectly + assume only one register was used as argument when calling + jit_regarg_p(); _jit->function.self.arg{i,f} must be the + number of registers used as arguments (in all backends). + + * lib/jit_x86.c: Add workaround, by marking %rax as used, + to a special condition, when running out of registers and the + allocator trying to spill and reload %rax, but %rax was used + as a pointer to a function, what would cause the reload to + destroy the return value. This condition can be better + generalized, but the current solution is good enough. + + * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c, lib/jit_ppc.c: + Rewrite logic to handle arguments, as the original code was + written based on a SysV pdf about the generic powerpc ABI, + what did "invent" a new abi for the previous test cases, but + failed in the new ccall test in Darwin PPC. Now it properly + handles 13 float registers for arguments, as well as proper + computation of stack offsets when running out of registers + for arguments. + 2013年01月02日 Paulo Andrade <pcpa@gnu.org> * check/float.tst: Correct test case to match ppc also diff --git a/check/Makefile.am b/check/Makefile.am index 70bb439..8324c36 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -14,16 +14,17 @@ AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE -check_PROGRAMS = lightning +check_PROGRAMS = lightning ccall lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +lightning_SOURCES = lightning.c + +ccall_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +ccall_SOURCES = ccall.c $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la -lightning_SOURCES = \ - lightning.c - EXTRA_DIST = \ 3to2.tst 3to2.ok \ add.tst add.ok \ @@ -65,6 +66,7 @@ EXTRA_DIST = \ carry.tst carry.ok \ call.tst call.ok \ float.tst float.ok \ + ccall.ok \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ @@ -114,6 +116,7 @@ $(swf_TESTS): check.swf.sh TESTS += $(swf_TESTS) endif +TESTS += ccall CLEANFILES = $(TESTS) TESTS_ENVIRONMENT=$(srcdir)/run-test diff --git a/check/ccall.c b/check/ccall.c new file mode 100644 index 0000000..1ff6b2a --- /dev/null +++ b/check/ccall.c @@ -0,0 +1,894 @@ +#include <lightning.h> +#include <stdio.h> + +#define _w0 0 +#define _w1 1 +#define _w2 (_w1-2) +#define _w3 (_w2-3) +#define _w4 (_w3-4) +#define _w5 (_w4-5) +#define _w6 (_w5-6) +#define _w7 (_w6-7) +#define _w8 (_w7-8) +#define _w9 (_w8-9) +#define _w10 (_w9-10) +#define _w11 (_w10-11) +#define _w12 (_w11-12) +#define _w13 (_w12-13) +#define _w14 (_w13-14) +#define _w15 (_w14-15) +#define _c0 _w0 +#define _c1 _w1 +#define _c2 _w2 +#define _c3 _w3 +#define _c4 _w4 +#define _c5 _w5 +#define _c6 _w6 +#define _c7 _w7 +#define _c8 _w8 +#define _c9 _w9 +#define _c10 _w10 +#define _c11 _w11 +#define _c12 _w12 +#define _c13 _w13 +#define _c14 _w14 +#define _c15 _w15 +#define _uc0 (_w0&0xff) +#define _uc1 (_w1&0xff) +#define _uc2 (_w2&0xff) +#define _uc3 (_w3&0xff) +#define _uc4 (_w4&0xff) +#define _uc5 (_w5&0xff) +#define _uc6 (_w6&0xff) +#define _uc7 (_w7&0xff) +#define _uc8 (_w8&0xff) +#define _uc9 (_w9&0xff) +#define _uc10 (_w10&0xff) +#define _uc11 (_w11&0xff) +#define _uc12 (_w12&0xff) +#define _uc13 (_w13&0xff) +#define _uc14 (_w14&0xff) +#define _uc15 (_w15&0xff) +#define _s0 _w0 +#define _s1 _w1 +#define _s2 _w2 +#define _s3 _w3 +#define _s4 _w4 +#define _s5 _w5 +#define _s6 _w6 +#define _s7 _w7 +#define _s8 _w8 +#define _s9 _w9 +#define _s10 _w10 +#define _s11 _w11 +#define _s12 _w12 +#define _s13 _w13 +#define _s14 _w14 +#define _s15 _w15 +#define _us0 (_w0&0xffff) +#define _us1 (_w1&0xffff) +#define _us2 (_w2&0xffff) +#define _us3 (_w3&0xffff) +#define _us4 (_w4&0xffff) +#define _us5 (_w5&0xffff) +#define _us6 (_w6&0xffff) +#define _us7 (_w7&0xffff) +#define _us8 (_w8&0xffff) +#define _us9 (_w9&0xffff) +#define _us10 (_w10&0xffff) +#define _us11 (_w11&0xffff) +#define _us12 (_w12&0xffff) +#define _us13 (_w13&0xffff) +#define _us14 (_w14&0xffff) +#define _us15 (_w15&0xffff) +#define _i0 _w0 +#define _i1 _w1 +#define _i2 _w2 +#define _i3 _w3 +#define _i4 _w4 +#define _i5 _w5 +#define _i6 _w6 +#define _i7 _w7 +#define _i8 _w8 +#define _i9 _w9 +#define _i10 _w10 +#define _i11 _w11 +#define _i12 _w12 +#define _i13 _w13 +#define _i14 _w14 +#define _i15 _w15 +#if __WORDSIZE == 64 +# define _ui0 (_w0&0xffffffff) +# define _ui1 (_w1&0xffffffff) +# define _ui2 (_w2&0xffffffff) +# define _ui3 (_w3&0xffffffff) +# define _ui4 (_w4&0xffffffff) +# define _ui5 (_w5&0xffffffff) +# define _ui6 (_w6&0xffffffff) +# define _ui7 (_w7&0xffffffff) +# define _ui8 (_w8&0xffffffff) +# define _ui9 (_w9&0xffffffff) +# define _ui10 (_w10&0xffffffff) +# define _ui11 (_w11&0xffffffff) +# define _ui12 (_w12&0xffffffff) +# define _ui13 (_w13&0xffffffff) +# define _ui14 (_w14&0xffffffff) +# define _ui15 (_w15&0xffffffff) +# define _l0 _w0 +# define _l1 _w1 +# define _l2 _w2 +# define _l3 _w3 +# define _l4 _w4 +# define _l5 _w5 +# define _l6 _w6 +# define _l7 _w7 +# define _l8 _w8 +# define _l9 _w9 +# define _l10 _w10 +# define _l11 _w11 +# define _l12 _w12 +# define _l13 _w13 +# define _l14 _w14 +# define _l15 _w15 +#endif + +/* + * Types + */ +typedef signed char _c; +typedef unsigned char _uc; +typedef signed short _s; +typedef unsigned short _us; +typedef signed int _i; +#if __WORDSIZE == 64 +typedef unsigned int _ui; +typedef signed long _l; +#endif +typedef float _f; +typedef double _d; + +#define prt0(T) T C##T##0(void); +#define prt1(T) prt0(T) \ + T C##T##1(T); +#define prt2(T) prt1(T) \ + T C##T##2(T,T); +#define prt3(T) prt2(T) \ + T C##T##3(T,T,T); +#define prt4(T) prt3(T) \ + T C##T##4(T,T,T,T); +#define prt5(T) prt4(T) \ + T C##T##5(T,T,T,T,T); +#define prt6(T) prt5(T) \ + T C##T##6(T,T,T,T,T,T); +#define prt7(T) prt6(T) \ + T C##T##7(T,T,T,T,T,T,T); +#define prt8(T) prt7(T) \ + T C##T##8(T,T,T,T,T,T,T,T); +#define prt9(T) prt8(T) \ + T C##T##9(T,T,T,T,T,T,T,T,T); +#define prt10(T) prt9(T) \ + T C##T##10(T,T,T,T,T,T,T,T,T,T); +#define prt11(T) prt10(T) \ + T C##T##11(T,T,T,T,T,T,T,T,T,T,T); +#define prt12(T) prt11(T) \ + T C##T##12(T,T,T,T,T,T,T,T,T,T,T,T); +#define prt13(T) prt12(T) \ + T C##T##13(T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt14(T) prt13(T) \ + T C##T##14(T,T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt15(T) prt14(T) \ + T C##T##15(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt(T) prt15(T) +prt(_c) +prt(_uc) +prt(_s) +prt(_us) +prt(_i) +#if __WORDSIZE == 64 +prt(_ui) +prt(_l) +#endif +prt(_f) +prt(_d) +#undef prt +#undef prt15 +#undef prt14 +#undef prt13 +#undef prt12 +#undef prt11 +#undef prt10 +#undef prt9 +#undef prt8 +#undef prt7 +#undef prt6 +#undef prt5 +#undef prt4 +#undef prt3 +#undef prt2 +#undef prt1 +#undef prt0 + +#define prtn(N,T) T J##T##n(void); +#define prt0(T) prtn(0,T) +#define prt1(T) prt0(T) prtn(1,T) +#define prt2(T) prt1(T) prtn(2,T) +#define prt3(T) prt2(T) prtn(3,T) +#define prt4(T) prt3(T) prtn(4,T) +#define prt5(T) prt4(T) prtn(5,T) +#define prt6(T) prt5(T) prtn(6,T) +#define prt7(T) prt6(T) prtn(7,T) +#define prt8(T) prt7(T) prtn(8,T) +#define prt9(T) prt8(T) prtn(9,T) +#define prt10(T) prt9(T) prtn(10,T) +#define prt11(T) prt10(T) prtn(11,T) +#define prt12(T) prt11(T) prtn(12,T) +#define prt13(T) prt12(T) prtn(13,T) +#define prt14(T) prt13(T) prtn(14,T) +#define prt15(T) prt14(T) prtn(15,T) +#define prt(T) prt15(T) +prt(_c) +prt(_uc) +prt(_s) +prt(_us) +prt(_i) +#if __WORDSIZE == 64 +prt(_ui) +prt(_l) +#endif +prt(_f) +prt(_d) +#undef prt +#undef prt15 +#undef prt14 +#undef prt13 +#undef prt12 +#undef prt11 +#undef prt10 +#undef prt9 +#undef prt8 +#undef prt7 +#undef prt6 +#undef prt5 +#undef prt4 +#undef prt3 +#undef prt2 +#undef prt1 +#undef prt0 +#undef prtn + +/* + * Initialization + */ + +#define dat0(T) T (*j##T##0)(void); \ + jit_node_t *n##T##0; +#define dat1(T) dat0(T) \ + T (*j##T##1)(T); \ + jit_node_t *n##T##1; +#define dat2(T) dat1(T) \ + T (*j##T##2)(T,T); \ + jit_node_t *n##T##2; +#define dat3(T) dat2(T) \ + T (*j##T##3)(T,T,T); \ + jit_node_t *n##T##3; +#define dat4(T) dat3(T) \ + T (*j##T##4)(T,T,T,T); \ + jit_node_t *n##T##4; +#define dat5(T) dat4(T) \ + T (*j##T##5)(T,T,T,T,T); \ + jit_node_t *n##T##5; +#define dat6(T) dat5(T) \ + T (*j##T##6)(T,T,T,T,T,T); \ + jit_node_t *n##T##6; +#define dat7(T) dat6(T) \ + T (*j##T##7)(T,T,T,T,T,T,T); \ + jit_node_t *n##T##7; +#define dat8(T) dat7(T) \ + T (*j##T##8)(T,T,T,T,T,T,T,T); \ + jit_node_t *n##T##8; +#define dat9(T) dat8(T) \ + T (*j##T##9)(T,T,T,T,T,T,T,T,T); \ + jit_node_t *n##T##9; +#define dat10(T) dat9(T) \ + T (*j##T##10)(T,T,T,T,T,T,T,T,T,T); \ + jit_node_t *n##T##10; +#define dat11(T) dat10(T) \ + T (*j##T##11)(T,T,T,T,T,T,T,T,T,T,T); \ + jit_node_t *n##T##11; +#define dat12(T) dat11(T) \ + T (*j##T##12)(T,T,T,T,T,T,T,T,T,T,T,T); \ + jit_node_t *n##T##12; +#define dat13(T) dat12(T) \ + T (*j##T##13)(T,T,T,T,T,T,T,T,T,T,T,T,T);\ + jit_node_t *n##T##13; +#define dat14(T) dat13(T) \ + T (*j##T##14)(T,T,T,T,T,T,T,T,T,T,T,T,T,T);\ + jit_node_t *n##T##14; +#define dat15(T) dat14(T) \ + T (*j##T##15)(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T);\ + jit_node_t *n##T##15; +#define dat(T) dat15(T) +dat(_c) +dat(_uc) +dat(_s) +dat(_us) +dat(_i) +#if __WORDSIZE == 64 +dat(_ui) +dat(_l) +#endif +dat(_f) +dat(_d) +#undef dat +#undef dat15 +#undef dat14 +#undef dat13 +#undef dat12 +#undef dat11 +#undef dat10 +#undef dat9 +#undef dat8 +#undef dat7 +#undef dat6 +#undef dat5 +#undef dat4 +#undef dat3 +#undef dat2 +#undef dat1 +#undef dat0 + +/* + * Implementation + */ +#define dcl0(T) \ +T C##T##0(void) \ +{ \ + return (0); \ +} +#define dcl1(T) \ +dcl0(T) \ +T C##T##1(T A) \ +{ \ + return (A); \ +} +#define dcl2(T) \ +dcl1(T) \ +T C##T##2(T A,T B) \ +{ \ + return (A-B); \ +} +#define dcl3(T) \ +dcl2(T) \ +T C##T##3(T A,T B,T C) \ +{ \ + return (A-B-C); \ +} +#define dcl4(T) \ +dcl3(T) \ +T C##T##4(T A,T B,T C,T D) \ +{ \ + return (A-B-C-D); \ +} +#define dcl5(T) \ +dcl4(T) \ +T C##T##5(T A,T B,T C,T D,T E) \ +{ \ + return (A-B-C-D-E); \ +} +#define dcl6(T) \ +dcl5(T) \ +T C##T##6(T A,T B,T C,T D,T E,T F) \ +{ \ + return (A-B-C-D-E-F); \ +} +#define dcl7(T) \ +dcl6(T) \ +T C##T##7(T A,T B,T C,T D,T E,T F,T G) \ +{ \ + return (A-B-C-D-E-F-G); \ +} +#define dcl8(T) \ +dcl7(T) \ +T C##T##8(T A,T B,T C,T D,T E,T F,T G,T H) \ +{ \ + return (A-B-C-D-E-F-G-H); \ +} +#define dcl9(T) \ +dcl8(T) \ +T C##T##9(T A,T B,T C,T D,T E,T F,T G,T H,T I) \ +{ \ + return (A-B-C-D-E-F-G-H-I); \ +} +#define dcl10(T) \ +dcl9(T) \ +T C##T##10(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J); \ +} +#define dcl11(T) \ +dcl10(T) \ +T C##T##11(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J-K); \ +} +#define dcl12(T) \ +dcl11(T) \ +T C##T##12(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J-K-L); \ +} +#define dcl13(T) \ +dcl12(T) \ +T C##T##13(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J-K-L-M); \ +} +#define dcl14(T) \ +dcl13(T) \ +T C##T##14(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J-K-L-M-N); \ +} +#define dcl15(T) \ +dcl14(T) \ +T C##T##15(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N,T O) \ +{ \ + return (A-B-C-D-E-F-G-H-I-J-K-L-M-N-O); \ +} +#define dcl(T) dcl15(T) +dcl(_c) +dcl(_uc) +dcl(_s) +dcl(_us) +dcl(_i) +#if __WORDSIZE == 64 +dcl(_ui) +dcl(_l) +#endif +dcl(_f) +dcl(_d) +#undef dcl +#undef dcl15 +#undef dcl14 +#undef dcl13 +#undef dcl12 +#undef dcl11 +#undef dcl10 +#undef dcl9 +#undef dcl8 +#undef dcl7 +#undef dcl6 +#undef dcl5 +#undef dcl4 +#undef dcl3 +#undef dcl2 +#undef dcl1 +#undef dcl0 + +#define dcl0(T) \ +T CJ##T##0(void) \ +{ \ + return ((*j##T##0)()); \ +} +#define dcl1(T) \ +dcl0(T) \ +T CJ##T##1(void) \ +{ \ + return ((*j##T##1)(1)); \ +} +#define dcl2(T) \ +dcl1(T) \ +T CJ##T##2(void) \ +{ \ + return ((*j##T##2)(1,2)); \ +} +#define dcl3(T) \ +dcl2(T) \ +T CJ##T##3(void) \ +{ \ + return ((*j##T##3)(1,2,3)); \ +} +#define dcl4(T) \ +dcl3(T) \ +T CJ##T##4(void) \ +{ \ + return ((*j##T##4)(1,2,3,4)); \ +} +#define dcl5(T) \ +dcl4(T) \ +T CJ##T##5(void) \ +{ \ + return ((*j##T##5)(1,2,3,4,5)); \ +} +#define dcl6(T) \ +dcl5(T) \ +T CJ##T##6(void) \ +{ \ + return ((*j##T##6)(1,2,3,4,5,6)); \ +} +#define dcl7(T) \ +dcl6(T) \ +T CJ##T##7(void) \ +{ \ + return ((*j##T##7)(1,2,3,4,5,6,7)); \ +} +#define dcl8(T) \ +dcl7(T) \ +T CJ##T##8(void) \ +{ \ + return ((*j##T##8)(1,2,3,4,5,6,7,8)); \ +} +#define dcl9(T) \ +dcl8(T) \ +T CJ##T##9(void) \ +{ \ + return ((*j##T##9)(1,2,3,4,5,6,7,8,9)); \ +} +#define dcl10(T) \ +dcl9(T) \ +T CJ##T##10(void) \ +{ \ + return ((*j##T##10)(1,2,3,4,5,6,7,8,9,10)); \ +} +#define dcl11(T) \ +dcl10(T) \ +T CJ##T##11(void) \ +{ \ + return ((*j##T##11)(1,2,3,4,5,6,7,8,9,10,11)); \ +} +#define dcl12(T) \ +dcl11(T) \ +T CJ##T##12(void) \ +{ \ + return ((*j##T##12)(1,2,3,4,5,6,7,8,9,10,11,12)); \ +} +#define dcl13(T) \ +dcl12(T) \ +T CJ##T##13(void) \ +{ \ + return ((*j##T##13)(1,2,3,4,5,6,7,8,9,10,11,12,13)); \ +} +#define dcl14(T) \ +dcl13(T) \ +T CJ##T##14(void) \ +{ \ + return ((*j##T##14)(1,2,3,4,5,6,7,8,9,10,11,12,13,14)); \ +} +#define dcl15(T) \ +dcl14(T) \ +T CJ##T##15(void) \ +{ \ + return ((*j##T##15)(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)); \ +} +#define dcl(t) dcl15(t) +dcl(_c) +dcl(_uc) +dcl(_s) +dcl(_us) +dcl(_i) +#if __WORDSIZE == 64 +dcl(_ui) +dcl(_l) +#endif +dcl(_f) +dcl(_d) +#undef dcl +#undef dcl15 +#undef dcl14 +#undef dcl13 +#undef dcl12 +#undef dcl11 +#undef dcl10 +#undef dcl9 +#undef dcl8 +#undef dcl7 +#undef dcl6 +#undef dcl5 +#undef dcl4 +#undef dcl3 +#undef dcl2 +#undef dcl1 +#undef dcl0 + +int +main(int argc, char *argv[]) +{ + jit_state_t *_jit; + jit_node_t *jmpi_main; + void (*function)(void); + jit_node_t *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,*a9; + jit_node_t *a10,*a11,*a12,*a13,*a14,*a15; + jit_node_t *jmp; + + init_jit(); + _jit = jit_new_state(); + + jmpi_main = jit_jmpi(); + + +#define arg0(T) /**/ +#define arg1(T) a1 = jit_arg##T(); +#define arg2(T) arg1(T) a2 = jit_arg##T(); +#define arg3(T) arg2(T) a3 = jit_arg##T(); +#define arg4(T) arg3(T) a4 = jit_arg##T(); +#define arg5(T) arg4(T) a5 = jit_arg##T(); +#define arg6(T) arg5(T) a6 = jit_arg##T(); +#define arg7(T) arg6(T) a7 = jit_arg##T(); +#define arg8(T) arg7(T) a8 = jit_arg##T(); +#define arg9(T) arg8(T) a9 = jit_arg##T(); +#define arg10(T) arg9(T) a10 = jit_arg##T(); +#define arg11(T) arg10(T) a11 = jit_arg##T(); +#define arg12(T) arg11(T) a12 = jit_arg##T(); +#define arg13(T) arg12(T) a13 = jit_arg##T(); +#define arg14(T) arg13(T) a14 = jit_arg##T(); +#define arg15(T) arg14(T) a15 = jit_arg##T(); + +#define get0(B,T,R) jit_movi##B(R##0,0); +#define get1(B,T,R) jit_getarg##B(R##0,a##1); +#define get2(B,T,R) \ + get1(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##2); \ + jit_subr##B(R##0, R##1, R##0); +#define get3(B,T,R) \ + get2(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##3); \ + jit_subr##B(R##0, R##1, R##0); +#define get4(B,T,R) \ + get3(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##4); \ + jit_subr##B(R##0, R##1, R##0); +#define get5(B,T,R) \ + get4(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##5); \ + jit_subr##B(R##0, R##1, R##0); +#define get6(B,T,R) \ + get5(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##6); \ + jit_subr##B(R##0, R##1, R##0); +#define get7(B,T,R) \ + get6(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##7); \ + jit_subr##B(R##0, R##1, R##0); +#define get8(B,T,R) \ + get7(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##8); \ + jit_subr##B(R##0, R##1, R##0); +#define get9(B,T,R) \ + get8(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##9); \ + jit_subr##B(R##0, R##1, R##0); +#define get10(B,T,R) \ + get9(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##10); \ + jit_subr##B(R##0, R##1, R##0); +#define get11(B,T,R) \ + get10(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##11); \ + jit_subr##B(R##0, R##1, R##0); +#define get12(B,T,R) \ + get11(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##12); \ + jit_subr##B(R##0, R##1, R##0); +#define get13(B,T,R) \ + get12(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##13); \ + jit_subr##B(R##0, R##1, R##0); +#define get14(B,T,R) \ + get13(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##14); \ + jit_subr##B(R##0, R##1, R##0); +#define get15(B,T,R) \ + get14(B,T,R); \ + jit_movr##B(R##1, R##0); \ + jit_getarg##T(R##0, a##15); \ + jit_subr##B(R##0, R##1, R##0); + +#if __WORDSIZE == 32 +# define jit_extr_i(u, v) /**/ +#else +# define jit_extr_l(u, v) /**/ +#endif + +#define defi(T, N) \ + n##T##N = jit_note(NULL); \ + jit_prolog(); \ + arg##N(); \ + get##N(,T,JIT_R) \ + jit_extr##T(JIT_R0, JIT_R0); \ + jit_retr(JIT_R0); \ + jit_epilog(); +#define deff(T, N) \ + n##T##N = jit_note(NULL); \ + jit_prolog(); \ + arg##N(T); \ + get##N(T,T,JIT_F); \ + jit_retr##T(JIT_F0); \ + jit_epilog(); + +#define def0(X, T) def##X(T, 0) +#define def1(X, T) def0(X, T) def##X(T, 1) +#define def2(X, T) def1(X, T) def##X(T, 2) +#define def3(X, T) def2(X, T) def##X(T, 3) +#define def4(X, T) def3(X, T) def##X(T, 4) +#define def5(X, T) def4(X, T) def##X(T, 5) +#define def6(X, T) def5(X, T) def##X(T, 6) +#define def7(X, T) def6(X, T) def##X(T, 7) +#define def8(X, T) def7(X, T) def##X(T, 8) +#define def9(X, T) def8(X, T) def##X(T, 9) +#define def10(X, T) def9(X, T) def##X(T, 10) +#define def11(X, T) def10(X, T) def##X(T, 11) +#define def12(X, T) def11(X, T) def##X(T, 12) +#define def13(X, T) def12(X, T) def##X(T, 13) +#define def14(X, T) def13(X, T) def##X(T, 14) +#define def15(X, T) def14(X, T) def##X(T, 15) +#define def(T) def15(i, T) + def(_c) + def(_uc) + def(_s) + def(_us) + def(_i) +#if __WORDSIZE == 64 + def(_ui) + def(_l) +#endif +#undef def +#define def(T) def15(f, T) + def(_f) + def(_d) +#undef def + + jit_patch(jmpi_main); + jit_prolog(); + +#define push0(T) /**/ +#define push1(T) jit_pushargi##T(1); +#define push2(T) push1(T) jit_pushargi##T(2); +#define push3(T) push2(T) jit_pushargi##T(3); +#define push4(T) push3(T) jit_pushargi##T(4); +#define push5(T) push4(T) jit_pushargi##T(5); +#define push6(T) push5(T) jit_pushargi##T(6); +#define push7(T) push6(T) jit_pushargi##T(7); +#define push8(T) push7(T) jit_pushargi##T(8); +#define push9(T) push8(T) jit_pushargi##T(9); +#define push10(T) push9(T) jit_pushargi##T(10); +#define push11(T) push10(T) jit_pushargi##T(11); +#define push12(T) push11(T) jit_pushargi##T(12); +#define push13(T) push12(T) jit_pushargi##T(13); +#define push14(T) push13(T) jit_pushargi##T(14); +#define push15(T) push14(T) jit_pushargi##T(15); + +#define calin(T,N) \ + jit_prepare(); \ + push##N() \ + jit_finishi(C##T##N); \ + jit_retval##T(JIT_R0); \ + jmp = jit_beqi(JIT_R0, T##N); \ + jit_calli(abort); \ + jit_patch(jmp); +#define calfn(T,N) \ + jit_prepare(); \ + push##N(T) \ + jit_finishi(C##T##N); \ + jit_retval##T(JIT_F0); \ + jmp = jit_beqi##T(JIT_F0, _w##N); \ + jit_calli(abort); \ + jit_patch(jmp); +#define calx0(X,T) cal##X##n(T,0) +#define calx1(X,T) calx0(X,T) cal##X##n(T,1) +#define calx2(X,T) calx1(X,T) cal##X##n(T,2) +#define calx3(X,T) calx2(X,T) cal##X##n(T,3) +#define calx4(X,T) calx3(X,T) cal##X##n(T,4) +#define calx5(X,T) calx4(X,T) cal##X##n(T,5) +#define calx6(X,T) calx5(X,T) cal##X##n(T,6) +#define calx7(X,T) calx6(X,T) cal##X##n(T,7) +#define calx8(X,T) calx7(X,T) cal##X##n(T,8) +#define calx9(X,T) calx8(X,T) cal##X##n(T,9) +#define calx10(X,T) calx9(X,T) cal##X##n(T,10) +#define calx11(X,T) calx10(X,T) cal##X##n(T,11) +#define calx12(X,T) calx11(X,T) cal##X##n(T,12) +#define calx13(X,T) calx12(X,T) cal##X##n(T,13) +#define calx14(X,T) calx13(X,T) cal##X##n(T,14) +#define calx15(X,T) calx14(X,T) cal##X##n(T,15) +#define cali(T) calx15(i,T) +#define calf(T) calx15(f,T) + + cali(_c) + cali(_uc) + cali(_s) + cali(_us) + cali(_i) +#if __WORDSIZE == 64 + cali(_ui) + cali(_l) +#endif + calf(_f) + calf(_d) + +#undef calin +#undef calfn +#define calin(T,N) \ + jit_prepare(); \ + push##N() \ + jit_finishi(CJ##T##N); \ + jit_retval##T(JIT_R0); \ + jmp = jit_beqi(JIT_R0, T##N); \ + jit_calli(abort); \ + jit_patch(jmp); +#define calfn(T,N) \ + jit_prepare(); \ + push##N(T) \ + jit_finishi(CJ##T##N); \ + jit_retval##T(JIT_F0); \ + jmp = jit_beqi##T(JIT_F0, _w##N); \ + jit_calli(abort); \ + jit_patch(jmp); + cali(_c) + cali(_uc) + cali(_s) + cali(_us) + cali(_i) +#if __WORDSIZE == 64 + cali(_ui) + cali(_l) +#endif + calf(_f) + calf(_d) + + jit_ret(); + + function = jit_emit(); + +#define initn(T,N) j##T##N = jit_address(n##T##N); +#define init0(T) initn(T,0) +#define init1(T) init0(T) initn(T,1) +#define init2(T) init1(T) initn(T,2) +#define init3(T) init2(T) initn(T,3) +#define init4(T) init3(T) initn(T,4) +#define init5(T) init4(T) initn(T,5) +#define init6(T) init5(T) initn(T,6) +#define init7(T) init6(T) initn(T,7) +#define init8(T) init7(T) initn(T,8) +#define init9(T) init8(T) initn(T,9) +#define init10(T) init9(T) initn(T,10) +#define init11(T) init10(T) initn(T,11) +#define init12(T) init11(T) initn(T,12) +#define init13(T) init12(T) initn(T,13) +#define init14(T) init13(T) initn(T,14) +#define init15(T) init14(T) initn(T,15) +#define init(T) init15(T) + init(_c) + init(_uc) + init(_s) + init(_us) + init(_i) +#if __WORDSIZE == 64 + init(_ui) + init(_l) +#endif + init(_f) + init(_d) + +#if 0 + jit_print(); + jit_disassemble(); +#endif + (*function)(); + finish_jit(); + + printf("ok\n"); + + return (0); +} diff --git a/check/ccall.ok b/check/ccall.ok new file mode 100644 index 0000000..9766475 --- /dev/null +++ b/check/ccall.ok @@ -0,0 +1 @@ +ok diff --git a/include/lightning.h b/include/lightning.h index e992c6d..298ef18 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -728,6 +728,8 @@ extern void finish_jit(void); extern jit_state_t *jit_new_state(void); +#define jit_address(node) _jit_address(_jit, node) +extern jit_pointer_t _jit_address(jit_state_t*, jit_node_t*); extern jit_node_t *_jit_data(jit_state_t*, jit_pointer_t, jit_word_t); extern jit_node_t *_jit_note(jit_state_t*, jit_pointer_t); diff --git a/include/lightning/jit_ppc.h b/include/lightning/jit_ppc.h index ee07648..dcdc8f1 100644 --- a/include/lightning/jit_ppc.h +++ b/include/lightning/jit_ppc.h @@ -30,8 +30,8 @@ typedef enum { #define jit_r_num() 3 #define jit_v(i) (_R30 - (i)) #define jit_v_num() 17 -#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) -#define jit_f(i) (_F0 + (i)) +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 13) +#define jit_f(i) (_F14 + (i)) #define jit_f_num() 6 _R0, #define JIT_R0 _R11 @@ -73,13 +73,7 @@ typedef enum { #define JIT_RA6 _R9 #define JIT_RA7 _R10 _R10, _R9, _R8, _R7, _R6, _R5, _R4, _R3, -# define JIT_F0 _F0 -# define JIT_F1 _F9 -# define JIT_F2 _F10 -# define JIT_F3 _F11 -# define JIT_F4 _F12 -# define JIT_F5 _F13 - _F0, _F9, _F10, _F11, _F12, _F13, + _F0, #define JIT_FS0 _F14 #define JIT_FS1 _F15 #define JIT_FS2 _F16 @@ -98,9 +92,17 @@ typedef enum { #define JIT_FS15 _F29 #define JIT_FS16 _F30 #define JIT_FS17 _F31 - _F14, _F15, _F16, _F17, _F18, _F19, _F20, - _F21, _F22, _F23, _F24, _F25, _F26, _F27, - _F28, _F29, _F30, _F31, + _F14, _F15, _F16, _F17, _F18, _F19, _F20, _F21, +#define JIT_F0 _F14 +#define JIT_F1 _F15 +#define JIT_F2 _F16 +#define JIT_F3 _F17 +#define JIT_F4 _F18 +#define JIT_F5 _F19 + /* FIXME _F20-_F31 not (easily) accesible and only _F14-_F21 + * saved/restored (if used) */ + _F22, _F23, _F24, _F25, _F26, _F27, _F28, _F29, + _F30, _F31, #define JIT_FRET _F1 #define JIT_FA0 _F1 #define JIT_FA1 _F2 @@ -110,7 +112,13 @@ typedef enum { #define JIT_FA5 _F6 #define JIT_FA6 _F7 #define JIT_FA7 _F8 - _F8, _F7, _F6, _F5, _F4, _F3, _F2, _F1, +#define JIT_FA8 _F9 +#define JIT_FA9 _F10 +#define JIT_FA10 _F11 +#define JIT_FA11 _F12 +#define JIT_FA12 _F13 + _F13, _F12, _F11, _F10, _F9, _F8, _F7, _F6, + _F5, _F4, _F3, _F2, _F1, _NOREG, #define JIT_NOREG _NOREG } jit_reg_t; diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 959af0f..ca6f970 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -222,6 +222,7 @@ struct jit_state { } pc; jit_node_t *head; jit_node_t *tail; + jit_uint32_t done : 1; /* emit state finished */ jit_uint32_t emit : 1; /* emit state entered */ jit_uint32_t again : 1; /* start over emiting function */ jit_uint32_t prepare : 1; /* inside prepare/finish* block */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index ed933a2..eb96a46 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1480,6 +1480,7 @@ _jit_emit(jit_state_t *_jit) } __clear_cache(_jit->code.ptr, _jit->pc.uc); + _jit->done = 1; return (_jit->code.ptr); } diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 5f85f19..aba1a05 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -235,13 +235,9 @@ _jit_arg(jit_state_t *_jit) jit_int32_t offset; assert(_jit->function); - if (_jit->function->self.argf) { - _jit->function->self.argi = _jit->function->self.argf; - _jit->function->self.argf = 0; - } - if (_jit->function->self.argi < 4) - offset = _jit->function->self.argi++; - else + offset = (_jit->function->self.size - stack_framesize) >> 2; + _jit->function->self.argi = 1; + if (offset >= 4) offset = _jit->function->self.size; _jit->function->self.size += sizeof(jit_word_t); return (jit_new_node_w(jit_code_arg, offset)); @@ -260,24 +256,20 @@ _jit_arg_f(jit_state_t *_jit) assert(_jit->function); offset = (_jit->function->self.size - stack_framesize) >> 2; - if (offset < 3) { + if (offset < 4) { if (!_jit->function->self.argi) { - offset += 4; - _jit->function->self.argf += 2; - assert(!(offset & 1)); - } - else { - _jit->function->self.argi += 2; - if (offset & 1) { - ++_jit->function->self.argi; - ++offset; - _jit->function->self.size += sizeof(jit_float32_t); + if (offset == 0) + offset = 4; + else { + offset = 6; + _jit->function->self.argi = 1; } } } else offset = _jit->function->self.size; _jit->function->self.size += sizeof(jit_float32_t); + return (jit_new_node_w(jit_code_arg_f, offset)); } @@ -296,19 +288,12 @@ _jit_arg_d(jit_state_t *_jit) assert(_jit->function); if (_jit->function->self.size & 7) { _jit->function->self.size += 4; - if (_jit->function->self.size < 16 && !_jit->function->self.argi) { - _jit->function->self.argi = _jit->function->self.argf; - _jit->function->self.argf = 0; - } + _jit->function->self.argi = 1; } offset = (_jit->function->self.size - stack_framesize) >> 2; if (offset < 4) { - if (!_jit->function->self.argi) { + if (!_jit->function->self.argi) offset += 4; - _jit->function->self.argf += 2; - } - else - _jit->function->self.argi += 2; } else offset = _jit->function->self.size; @@ -421,13 +406,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) assert(_jit->function); offset = _jit->function->call.size >> 2; - if (_jit->function->call.argf) - _jit->function->call.argf = 0; + _jit->function->call.argi = 1; if (offset < 4) jit_movr(_A0 - offset, u); else jit_stxi(_jit->function->call.size, JIT_SP, u); - _jit->function->call.argi = offset + 1; _jit->function->call.size += sizeof(jit_word_t); } @@ -439,8 +422,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) assert(_jit->function); offset = _jit->function->call.size >> 2; - if (_jit->function->call.argf) - _jit->function->call.argf = 0; + ++_jit->function->call.argi; if (offset < 4) jit_movi(_A0 - offset, u); else { @@ -449,7 +431,6 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) jit_stxi(_jit->function->call.size, JIT_SP, regno); jit_unget_reg(regno); } - _jit->function->call.argi = offset + 1; _jit->function->call.size += sizeof(jit_word_t); } @@ -460,26 +441,16 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) assert(_jit->function); offset = _jit->function->call.size >> 2; - if (offset < 3) { - if (offset & 1) { - ++offset; - _jit->function->call.size += 4; - } - if (_jit->function->call.argi) - jit_new_node_ww(jit_code_pushargr_f, _A0 - offset, u); - else - jit_movr_f(_F12 - (offset >> 1), u); + if (offset < 2 && !_jit->function->call.argi) { + ++_jit->function->call.argf; + jit_movr_f(_F12 - offset, u); } - else - jit_stxi_f(_jit->function->call.size, JIT_SP, u); - if (offset < 3) { - if (!_jit->function->call.argi) - _jit->function->call.argf = offset + 2; - else - _jit->function->call.argi = offset + 2; + else if (offset < 4) { + ++_jit->function->call.argi; + jit_new_node_ww(jit_code_pushargr_f, _A0 - offset, u); } else - _jit->function->call.argi = offset + 1; + jit_stxi_f(_jit->function->call.size, JIT_SP, u); _jit->function->call.size += sizeof(jit_float32_t); } @@ -491,55 +462,48 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) assert(_jit->function); offset = _jit->function->call.size >> 2; - if (offset < 3) { - if (offset & 1) { - ++offset; - _jit->function->call.size += 4; - } - if (_jit->function->call.argi) - jit_new_node_ww(jit_code_pushargi_f, _A0 - offset, u); - else - jit_movi_f(_F12 - (offset >> 1), u); + if (offset < 2 && !_jit->function->call.argi) { + ++_jit->function->call.argf; + jit_movi_f(_F12 - offset, u); + } + else if (offset < 4) { + ++_jit->function->call.argi; + jit_new_node_wf(jit_code_pushargi_f, _A0 - offset, u); } else { - assert(_jit->function); regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); jit_stxi_f(_jit->function->call.size, JIT_SP, regno); jit_unget_reg(regno); } - if (offset < 3) { - if (!_jit->function->call.argi) - _jit->function->call.argf = offset + 2; - else - _jit->function->call.argi = offset + 2; - } - else - _jit->function->call.argi = offset + 1; _jit->function->call.size += sizeof(jit_float32_t); } void _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { + jit_bool_t adjust; jit_word_t offset; assert(_jit->function); - if (_jit->function->call.size & 7) + adjust = !!_jit->function->call.argi; + if (_jit->function->call.size & 7) { _jit->function->call.size += 4; + adjust = 1; + } offset = _jit->function->call.size >> 2; if (offset < 3) { - if (_jit->function->call.argi) + if (adjust) { jit_new_node_ww(jit_code_pushargr_d, _A0 - offset, u); - else + _jit->function->call.argi += 2; + } + else { jit_movr_d(_F12 - (offset >> 1), u); + ++_jit->function->call.argf; + } } else jit_stxi_d(_jit->function->call.size, JIT_SP, u); - if (offset < 3 && !_jit->function->call.argi) - _jit->function->call.argf = offset + 2; - else - _jit->function->call.argi = offset + 2; _jit->function->call.size += sizeof(jit_float64_t); } @@ -547,17 +511,25 @@ void _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) { jit_int32_t regno; + jit_bool_t adjust; jit_word_t offset; assert(_jit->function); - if (_jit->function->call.size & 7) + adjust = !!_jit->function->call.argi; + if (_jit->function->call.size & 7) { _jit->function->call.size += 4; + adjust = 1; + } offset = _jit->function->call.size >> 2; if (offset < 3) { - if (_jit->function->call.argi) - jit_new_node_ww(jit_code_pushargi_d, _A0 - offset, u); - else + if (adjust) { + jit_new_node_wd(jit_code_pushargi_d, _A0 - offset, u); + _jit->function->call.argi += 2; + } + else { jit_movi_d(_F12 - (offset >> 1), u); + ++_jit->function->call.argf; + } } else { regno = jit_get_reg(jit_class_fpr); @@ -565,10 +537,6 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_d(_jit->function->call.size, JIT_SP, regno); jit_unget_reg(regno); } - if (offset < 3 && !_jit->function->call.argi) - _jit->function->call.argf = offset + 2; - else - _jit->function->call.argi = offset + 2; _jit->function->call.size += sizeof(jit_float64_t); } @@ -586,7 +554,7 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) } else if (spec & jit_class_fpr) { regno = _F12 - regno; - if (regno >= 0 && regno < node->v.w) + if (regno >= 0 && regno < node->w.w) return (1); } } @@ -1276,6 +1244,7 @@ _jit_emit(jit_state_t *_jit) #if defined(__linux__) _flush_cache((char *)_jit->code.ptr, _jit->pc.uc - _jit->code.ptr, ICACHE); #endif + _jit->done = 1; return (_jit->code.ptr); } diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index c0c6c70..7f8a835 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -17,10 +17,9 @@ #if PROTO # define gpr_save_area 72 /* r14~r31 = 18 * 4 */ -# define fpr_save_area 0 /* FIXME extra fpr registers - * not used */ +# define fpr_save_area 64 # define alloca_offset -(gpr_save_area + fpr_save_area) -# define params_offset 56 +# define params_offset 24 # define ii(i) *_jit->pc.ui++ = i # define can_sign_extend_short_p(im) ((im) >= -32768 && (im) <= 32767) # define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) @@ -2371,8 +2370,10 @@ static void _prolog(jit_state_t *_jit, jit_node_t *node) { unsigned long regno; + jit_word_t offset; - _jit->function->stack = ((_jit->function->self.alen + params_offset - + _jit->function->stack = ((_jit->function->self.alen + + _jit->function->self.size - _jit->function->self.aoff) + 15) & -16; /* return address */ @@ -2382,7 +2383,11 @@ _prolog(jit_state_t *_jit, jit_node_t *node) regno = jit_regset_scan1(_jit->function->regset, _R14); if (regno == ULONG_MAX || regno > _R31) regno = _R31; /* aka _FP_REGNO */ - STMW(rn(regno), _SP_REGNO, -(32 * 4) + rn(regno) * 4); + STMW(rn(regno), _SP_REGNO, -fpr_save_area - (32 * 4) + rn(regno) * 4); + for (offset = 0; offset < 8; offset++) { + if (jit_regset_tstbit(_jit->function->regset, _F14 + offset)) + stxi_d(-fpr_save_area + offset * 8, _SP_REGNO, rn(_F14 + offset)); + } stxi(8, _SP_REGNO, _R0_REGNO); @@ -2397,6 +2402,7 @@ static void _epilog(jit_state_t *_jit, jit_node_t *node) { unsigned long regno; + jit_word_t offset; LWZ(_SP_REGNO, _SP_REGNO, 0); ldxi(_R0_REGNO, _SP_REGNO, 8); @@ -2406,7 +2412,11 @@ _epilog(jit_state_t *_jit, jit_node_t *node) regno = jit_regset_scan1(_jit->function->regset, _R14); if (regno == ULONG_MAX || regno > _R31) regno = _R31; /* aka _FP_REGNO */ - LMW(rn(regno), _SP_REGNO, -(32 * 4) + rn(regno) * 4); + LMW(rn(regno), _SP_REGNO, -fpr_save_area - (32 * 4) + rn(regno) * 4); + for (offset = 0; offset < 8; offset++) { + if (jit_regset_tstbit(_jit->function->regset, _F14 + offset)) + ldxi_d(rn(_F14 + offset), _SP_REGNO, -fpr_save_area + offset * 8); + } BLR(); } diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index db0529c..c4a507e 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -69,11 +69,6 @@ jit_register_t _rvs[] = { { rc(arg) | rc(gpr) | 4, "r4" }, { rc(arg) | rc(gpr) | 3, "r3" }, { rc(fpr) | 0, "f0" }, - { rc(fpr) | 9, "f9" }, - { rc(fpr) | 10, "f10" }, - { rc(fpr) | 11, "f11" }, - { rc(fpr) | 12, "f12" }, - { rc(fpr) | 13, "f13" }, { rc(sav) | rc(fpr) | 14, "f14" }, { rc(sav) | rc(fpr) | 15, "f15" }, { rc(sav) | rc(fpr) | 16, "f16" }, @@ -92,6 +87,11 @@ jit_register_t _rvs[] = { { rc(sav) | rc(fpr) | 29, "f29" }, { rc(sav) | rc(fpr) | 30, "f30" }, { rc(sav) | rc(fpr) | 31, "f31" }, + { rc(arg) | rc(fpr) | 13, "f13" }, + { rc(arg) | rc(fpr) | 12, "f12" }, + { rc(arg) | rc(fpr) | 11, "f11" }, + { rc(arg) | rc(fpr) | 10, "f10" }, + { rc(arg) | rc(fpr) | 9, "f9" }, { rc(arg) | rc(fpr) | 8, "f8" }, { rc(arg) | rc(fpr) | 7, "f7" }, { rc(arg) | rc(fpr) | 6, "f6" }, @@ -242,10 +242,9 @@ _jit_arg(jit_state_t *_jit) assert(_jit->function); if (_jit->function->self.argi < 8) offset = _jit->function->self.argi++; - else { + else offset = _jit->function->self.size; - _jit->function->self.size += sizeof(jit_word_t); - } + _jit->function->self.size += sizeof(jit_word_t); return (jit_new_node_w(jit_code_arg, offset)); } @@ -260,12 +259,11 @@ _jit_arg_f(jit_state_t *_jit) { jit_int32_t offset; assert(_jit->function); - if (_jit->function->self.argf < 8) + if (_jit->function->self.argf < 13) offset = _jit->function->self.argf++; - else { + else offset = _jit->function->self.size; - _jit->function->self.size += sizeof(jit_float64_t); - } + _jit->function->self.size += sizeof(jit_float32_t); return (jit_new_node_w(jit_code_arg_f, offset)); } @@ -280,19 +278,18 @@ _jit_arg_d(jit_state_t *_jit) { jit_int32_t offset; assert(_jit->function); - if (_jit->function->self.argf < 8) + if (_jit->function->self.argf < 13) offset = _jit->function->self.argf++; - else { + else offset = _jit->function->self.size; - _jit->function->self.size += sizeof(jit_float64_t); - } + _jit->function->self.size += sizeof(jit_float64_t); return (jit_new_node_w(jit_code_arg_d, offset)); } jit_bool_t _jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset) { - return (offset >= 0 && offset < 8); + return (offset >= 0 && offset < 13); } void @@ -300,8 +297,14 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_extr_c(u, JIT_RA0 - v->u.w); - else + else { +#if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_c(u, JIT_FP, v->u.w); +#else + jit_ldxi_c(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int8_t)); +#endif + } } void @@ -309,8 +312,14 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_extr_uc(u, JIT_RA0 - v->u.w); - else + else { +#if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_uc(u, JIT_FP, v->u.w); +#else + jit_ldxi_uc(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint8_t)); +#endif + } } void @@ -318,8 +327,14 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_extr_s(u, JIT_RA0 - v->u.w); - else + else { +#if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_s(u, JIT_FP, v->u.w); +#else + jit_ldxi_s(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int16_t)); +#endif + } } void @@ -327,8 +342,14 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_extr_us(u, JIT_RA0 - v->u.w); - else + else { +#if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_us(u, JIT_FP, v->u.w); +#else + jit_ldxi_us(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint16_t)); +#endif + } } void @@ -336,8 +357,14 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_movr(u, JIT_RA0 - v->u.w); - else + else { +#if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_i(u, JIT_FP, v->u.w); +#else + jit_ldxi_i(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t)); +#endif + } } #if __WORDSIZE == 64 @@ -346,8 +373,14 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { if (v->u.w < 8) jit_extr_ui(u, JIT_RA0 - v->u.w); - else + else { +# if __BYTE_ORDER == __LITTLE__ENDIAN jit_ldxi_ui(u, JIT_FP, v->u.w); +# else + jit_ldxi_ui(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint32_t)); +# endif + } } void @@ -363,13 +396,16 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - jit_getarg_d(u, v); + if (v->u.w < 13) + jit_movr_d(u, JIT_FA0 - v->u.w); + else + jit_ldxi_f(u, JIT_FP, v->u.w); } void _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - if (v->u.w < 8) + if (v->u.w < 13) jit_movr_d(u, JIT_FA0 - v->u.w); else jit_ldxi_d(u, JIT_FP, v->u.w); @@ -383,10 +419,9 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) jit_movr(JIT_RA0 - _jit->function->call.argi, u); ++_jit->function->call.argi; } - else { + else jit_stxi(_jit->function->call.size + params_offset, JIT_SP, u); - _jit->function->call.size += sizeof(jit_word_t); - } + _jit->function->call.size += sizeof(jit_word_t); } void @@ -402,33 +437,88 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); jit_stxi(_jit->function->call.size + params_offset, JIT_SP, regno); - _jit->function->call.size += sizeof(jit_word_t); jit_unget_reg(regno); } + _jit->function->call.size += sizeof(jit_word_t); } void _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { - jit_pushargr_d(u); + assert(_jit->function); + if (_jit->function->call.argf < 13) { + jit_movr_d(JIT_FA0 - _jit->function->call.argf, u); + ++_jit->function->call.argf; + if (!(_jit->function->call.call & jit_call_varargs)) { + /* in case of excess arguments */ + if (_jit->function->call.argi < 8) + _jit->function->call.argi += 2; + _jit->function->call.size += sizeof(jit_float32_t); + return; + } + } + if (_jit->function->call.argi < 8) { + /* use reserved 8 bytes area */ + jit_stxi_d(alloca_offset - 8, JIT_FP, u); + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, + alloca_offset - 8); + _jit->function->call.argi++; + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, + alloca_offset - 4); + _jit->function->call.argi++; + } + else + jit_stxi_f(_jit->function->call.size + params_offset, JIT_SP, u); + _jit->function->call.size += sizeof(jit_float32_t); } void _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) { - jit_pushargi_d(u); + jit_int32_t regno; + + assert(_jit->function); + if (_jit->function->call.argf < 13) { + jit_movi_d(JIT_FA0 - _jit->function->call.argf, u); + ++_jit->function->call.argf; + if (!(_jit->function->call.call & jit_call_varargs)) { + /* in case of excess arguments */ + if (_jit->function->call.argi < 8) + _jit->function->call.argi += 2; + _jit->function->call.size += sizeof(jit_float32_t); + return; + } + } + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + if (_jit->function->call.argi < 8) { + /* use reserved 8 bytes area */ + jit_stxi_d(alloca_offset - 8, JIT_FP, regno); + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, + alloca_offset - 8); + _jit->function->call.argi++; + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, + alloca_offset - 4); + _jit->function->call.argi++; + } + else + jit_stxi_f(_jit->function->call.size + params_offset, JIT_SP, regno); + _jit->function->call.size += sizeof(jit_float32_t); + jit_unget_reg(regno); } void _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { assert(_jit->function); - if (_jit->function->call.argf < 8) { + if (_jit->function->call.argf < 13) { jit_movr_d(JIT_FA0 - _jit->function->call.argf, u); ++_jit->function->call.argf; if (!(_jit->function->call.call & jit_call_varargs)) { /* in case of excess arguments */ - ++_jit->function->call.argi; + if (_jit->function->call.argi < 8) + _jit->function->call.argi += 2; + _jit->function->call.size += sizeof(jit_float64_t); return; } } @@ -442,10 +532,9 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) alloca_offset - 4); _jit->function->call.argi++; } - else { + else jit_stxi_d(_jit->function->call.size + params_offset, JIT_SP, u); - _jit->function->call.size += sizeof(jit_float64_t); - } + _jit->function->call.size += sizeof(jit_float64_t); } void @@ -454,12 +543,14 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_int32_t regno; assert(_jit->function); - if (_jit->function->call.argf < 8) { + if (_jit->function->call.argf < 13) { jit_movi_d(JIT_FA0 - _jit->function->call.argf, u); ++_jit->function->call.argf; if (!(_jit->function->call.call & jit_call_varargs)) { /* in case of excess arguments */ - ++_jit->function->call.argi; + if (_jit->function->call.argi < 8) + _jit->function->call.argi += 2; + _jit->function->call.size += sizeof(jit_float64_t); return; } } @@ -475,10 +566,9 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) alloca_offset - 4); _jit->function->call.argi++; } - else { + else jit_stxi_d(_jit->function->call.size + params_offset, JIT_SP, regno); - _jit->function->call.size += sizeof(jit_float64_t); - } + _jit->function->call.size += sizeof(jit_float64_t); jit_unget_reg(regno); } @@ -1122,6 +1212,7 @@ _jit_emit(jit_state_t *_jit) } __clear_cache(_jit->code.ptr, _jit->pc.uc); + _jit->done = 1; return (_jit->code.ptr); } diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 6b14808..f795770 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -772,14 +772,18 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) if (_jit->function->self.alen < _jit->function->call.size) _jit->function->self.alen = _jit->function->call.size; #if __WORDSIZE == 64 - if (_jit->function->call.call & jit_call_varargs) - jit_regset_setbit(_jit->regarg, _RAX); + /* FIXME preventing %rax allocation is good enough, but for consistency + * it should automatically detect %rax is dead, in case it has run out + * registers, and not save/restore it, what would be wrong if using the + * the return value, otherwise, just a needless noop */ + /* >> prevent %rax from being allocated as the function pointer */ + jit_regset_setbit(_jit->regarg, _RAX); reg = jit_get_reg(jit_class_gpr); node = jit_movi(reg, (jit_word_t)i0); jit_finishr(reg); jit_unget_reg(reg); - if (_jit->function->call.call & jit_call_varargs) - jit_regset_clrbit(_jit->regarg, _RAX); + /* << prevent %rax from being allocated as the function pointer */ + jit_regset_clrbit(_jit->regarg, _RAX); #else node = jit_calli(i0); node->v.w = _jit->function->call.argi; @@ -1625,6 +1629,7 @@ _jit_emit(jit_state_t *_jit) word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; patch_at(node, _jit->patches.ptr[offset].inst, word); } + _jit->done = 1; return (_jit->code.ptr); } diff --git a/lib/lightning.c b/lib/lightning.c index c0d4411..8050251 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -311,6 +311,14 @@ hash_data(jit_pointer_t data, jit_word_t length) return (key); } +jit_pointer_t +_jit_address(jit_state_t *_jit, jit_node_t *node) +{ + assert(_jit->done); + assert(node && node->code == jit_code_note); + return ((jit_pointer_t)node->u.w); +} + jit_node_t * _jit_data(jit_state_t *_jit, jit_pointer_t data, jit_word_t length) { @@ -760,8 +768,7 @@ _jit_patch(jit_state_t* _jit, jit_node_t *instr) { jit_node_t *label; - if (!(label = _jit->tail) || - (label->code != jit_code_label && label->code != jit_code_epilog)) + if (!(label = _jit->tail) || label->code != jit_code_label) label = jit_label(); jit_patch_at(instr, label); } @@ -1347,6 +1354,48 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, jit_regset_setbit(*live, value); } break; +#if __arm__ + /* This is not trivial to generalize, so, any backend + * that needs to pass double as arguments or receive + * double results in an integer register pair should + * need such special case(s). + */ + case jit_code_movi_d: + if (!(node->u.w & jit_regno_patch)) { + spec = jit_class(_rvs[node->u.w].spec); + if (spec & jit_class_gpr) { + /* must be a reti_d or pushargi_d */ + jit_regset_clrbit(*mask, node->u.w + 1); + jit_regset_setbit(*live, node->u.w + 1); + jit_regset_clrbit(*mask, node->u.w); + jit_regset_setbit(*live, node->u.w); + } + else + goto fallback; + } + break; + case jit_code_movr_d: + if (!(node->u.w & jit_regno_patch)) { + spec = jit_class(_rvs[jit_regno(node->u.w)].spec); + if (spec & jit_class_gpr) { + /* must be a retr_d */ + jit_regset_clrbit(*mask, node->u.w + 1); + jit_regset_setbit(*live, node->u.w + 1); + jit_regset_clrbit(*mask, node->u.w); + jit_regset_setbit(*live, node->u.w); + } + else + goto fallback; + } + if (!(node->v.w & jit_regno_patch)) { + if (jit_regset_tstbit(*mask, node->v.w)) { + jit_regset_clrbit(*mask, node->v.w); + jit_regset_setbit(*live, node->v.w); + } + } + break; + fallback: +#endif default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { |