-rw-r--r-- | check/Makefile.am | 9 | ||||
-rw-r--r-- | check/lightning.c | 3 | ||||
-rw-r--r-- | check/movzr.ok | 1 | ||||
-rw-r--r-- | check/movzr.tst | 62 | ||||
-rw-r--r-- | doc/body.texi | 2 | ||||
-rw-r--r-- | include/lightning.h.in | 4 | ||||
-rw-r--r-- | lib/jit_aarch64-cpu.c | 20 | ||||
-rw-r--r-- | lib/jit_aarch64-sz.c | 2 | ||||
-rw-r--r-- | lib/jit_aarch64.c | 2 | ||||
-rw-r--r-- | lib/jit_arm-cpu.c | 33 | ||||
-rw-r--r-- | lib/jit_arm-sz.c | 2 | ||||
-rw-r--r-- | lib/jit_arm.c | 2 | ||||
-rw-r--r-- | lib/jit_mips-cpu.c | 3 | ||||
-rw-r--r-- | lib/jit_mips-sz.c | 2 | ||||
-rw-r--r-- | lib/jit_mips.c | 2 | ||||
-rw-r--r-- | lib/jit_ppc-cpu.c | 20 | ||||
-rw-r--r-- | lib/jit_ppc-sz.c | 8 | ||||
-rw-r--r-- | lib/jit_ppc.c | 2 | ||||
-rw-r--r-- | lib/jit_x86-cpu.c | 31 | ||||
-rw-r--r-- | lib/jit_x86-sz.c | 8 | ||||
-rw-r--r-- | lib/jit_x86.c | 2 | ||||
-rw-r--r-- | lib/lightning.c | 1 |
diff --git a/check/Makefile.am b/check/Makefile.am index 583bb12..f1155d7 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -82,6 +82,7 @@ EXTRA_DIST = \ alu_rsh.tst alu_rsh.ok \ alu_com.tst alu_com.ok \ alu_neg.tst alu_neg.ok \ + movzr.tst movzr.ok \ fop_abs.tst fop_abs.ok \ fop_sqrt.tst fop_sqrt.ok \ varargs.tst varargs.ok \ @@ -123,6 +124,7 @@ base_TESTS = \ alu_and alu_or alu_xor \ alu_lsh alu_rsh \ alu_com alu_neg \ + movzr \ fop_abs fop_sqrt \ varargs stack \ clobber carry call \ @@ -151,6 +153,7 @@ x87_TESTS = \ alu_and.x87 alu_or.x87 alu_xor.x87 \ alu_lsh.x87 alu_rsh.x87 \ alu_com.x87 alu_neg.x87 \ + movzr.x87 \ fop_abs.x87 fop_sqrt.x87 \ varargs.x87 stack.x87 \ clobber.x87 carry.x87 call.x87 \ @@ -174,6 +177,7 @@ x87_nodata_TESTS = \ alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ alu_lsh.x87.nodata alu_rsh.x87.nodata \ alu_com.x87.nodata alu_neg.x87.nodata \ + movzr.x87.nodata \ fop_abs.x87.nodata fop_sqrt.x87.nodata \ varargs.x87.nodata stack.x87.nodata \ clobber.x87.nodata carry.x87.nodata call.x87.nodata \ @@ -199,6 +203,7 @@ arm_TESTS = \ alu_and.arm alu_or.arm alu_xor.arm \ alu_lsh.arm alu_rsh.arm \ alu_com.arm alu_neg.arm \ + movzr.arm \ fop_abs.arm fop_sqrt.arm \ varargs.arm stack.arm \ clobber.arm carry.arm call.arm \ @@ -224,6 +229,7 @@ swf_TESTS = \ alu_and.swf alu_or.swf alu_xor.swf \ alu_lsh.swf alu_rsh.swf \ alu_com.swf alu_neg.swf \ + movzr.swf \ fop_abs.swf fop_sqrt.swf \ varargs.swf stack.swf \ clobber.swf carry.swf call.swf \ @@ -247,6 +253,7 @@ arm_swf_TESTS = \ alu_and.arm.swf alu_or.arm.swf alu_xor.arm.swf \ alu_lsh.arm.swf alu_rsh.arm.swf \ alu_com.arm.swf alu_neg.arm.swf \ + movzr.arm.swf \ fop_abs.arm.swf fop_sqrt.arm.swf \ varargs.arm.swf stack.arm.swf \ clobber.arm.swf carry.arm.swf call.arm.swf \ @@ -271,6 +278,7 @@ arm4_swf_TESTS = \ alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \ alu_lsh.arm4.swf alu_rsh.arm4.swf \ alu_com.arm4.swf alu_neg.arm4.swf \ + movzr.arm4.swf \ fop_abs.arm4.swf fop_sqrt.arm4.swf \ varargs.arm4.swf stack.arm4.swf \ clobber.arm4.swf carry.arm4.swf call.arm4.swf \ @@ -298,6 +306,7 @@ nodata_TESTS = \ alu_and.nodata alu_or.nodata alu_xor.nodata \ alu_lsh.nodata alu_rsh.nodata \ alu_com.nodata alu_neg.nodata \ + movzr.nodata \ fop_abs.nodata fop_sqrt.nodata \ varargs.nodata stack.nodata \ clobber.nodata carry.nodata call.nodata \ diff --git a/check/lightning.c b/check/lightning.c index 4ce30d5..9798a5b 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -328,6 +328,7 @@ static void htonr_ui(void); static void ntohr_ui(void); static void htonr_ul(void); static void ntohr_ul(void); #endif static void htonr(void); static void ntohr(void); +static void movnr(void); static void movzr(void); static void ldr_c(void); static void ldi_c(void); static void ldr_uc(void); static void ldi_uc(void); static void ldr_s(void); static void ldi_s(void); @@ -642,6 +643,7 @@ static instr_t instr_vector[] = { entry(htonr_ul), entry(ntohr_ul), #endif entry(htonr), entry(ntohr), + entry(movnr), entry(movzr), entry(ldr_c), entry(ldi_c), entry(ldr_uc), entry(ldi_uc), entry(ldr_s), entry(ldi_s), @@ -1490,6 +1492,7 @@ entry_ir_ir(htonr_ui) entry_ir_ir(ntohr_ui) entry_ir_ir(htonr_ul) entry_ir_ir(ntohr_ul) #endif entry_ir_ir(htonr) entry_ir_ir(ntohr) +entry_ir_ir_ir(movnr) entry_ir_ir_ir(movzr) entry_ir_ir(ldr_c) entry_ir_pm(ldi_c) entry_ir_ir(ldr_uc) entry_ir_pm(ldi_uc) entry_ir_ir(ldr_s) entry_ir_pm(ldi_s) diff --git a/check/movzr.ok b/check/movzr.ok new file mode 100644 index 0000000..9766475 --- /dev/null +++ b/check/movzr.ok @@ -0,0 +1 @@ +ok diff --git a/check/movzr.tst b/check/movzr.tst new file mode 100644 index 0000000..baa3ff8 --- /dev/null +++ b/check/movzr.tst @@ -0,0 +1,62 @@ +.data 8 +ok: +.c "ok\n" + +#define CMOVR(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R1 I0 \ + movi %R2 I1 \ + movi %R0 V \ + OP##r##T %R0 %R1 %R2 \ + beqi OP##T##N##r_##R0##R1##R2 %R0 V \ + calli @abort \ +OP##T##N##r_##R0##R1##R2: + +/* reg0 = reg1 op reg0 */ +#define CMOVR1(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I1 \ + movi %R1 I0 \ + movi %R2 V \ + OP##r##T %R0 %R1 %R0 \ + beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2 \ + calli @abort \ +OP##T##N##r_1##R0##R1##R2: + +#define TEST_CMOV1(N, OP, I0, I1, V, R0, R1, R2) \ + CMOVR(N, , OP, I0, I1, V, R0, R1, R2) \ + CMOVR1(N, , OP, I0, I1, V, R0, R1, R2) \ + +#define TEST_CMOV(N, OP, I0, I1, V) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, v2) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r0) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, r0, r1, r2) + +#define MOVZR(N, I0, I1, V) TEST_CMOV(N, movz, I0, I1, V) +#define MOVNR(N, I0, I1, V) TEST_CMOV(N, movn, I0, I1, V) + +.code + prolog + + MOVZR(0, 0x0, 0x0, 0x0) + MOVZR(1, 0xf7de, 0x0, 0xf7de) + + MOVZR(2, 0x0, 0xdead, 0xdead) + MOVZR(3, 0xf7de, 0xdead, 0xdead) + + MOVNR(0, 0x0, 0x0, 0x0) + MOVNR(1, 0xf7de, 0x0, 0x0) + + MOVNR(2, 0x0, 0xdead, 0x0) + MOVNR(3, 0xf7de, 0xdead, 0xf7de) + + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/doc/body.texi b/doc/body.texi index 5593dad..48bfb9e 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -248,6 +248,8 @@ lshr O1 = O2 << O3 lshi O1 = O2 << O3 rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +movzr O1 = O3 ? O1 : O2 +movnr O1 = O3 ? O2 : O1 @end example @item Four operand binary ALU operations diff --git a/include/lightning.h.in b/include/lightning.h.in index e1d8a0a..422fc13 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -891,6 +891,10 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) +#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) +#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) + jit_code_movnr, jit_code_movzr, + jit_code_last_code } jit_code_t; diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 5829464..53698b0 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -290,6 +290,7 @@ typedef union { # define A64_CBNZ 0x35000000 # define A64_B_C 0x54000000 # define A64_CSINC 0x1a800400 +# define A64_CSSEL 0x1a800000 # define A64_REV 0xdac00c00 # define A64_UDIV 0x1ac00800 # define A64_SDIV 0x1ac00c00 @@ -461,6 +462,7 @@ typedef union { # define LDPI_PRE(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7) # define STPI_POS(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7) # define CSET(Rd,Cc) CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc) +# define CSEL(Rd,Rn,Rm,Cc) oxxxc(A64_CSSEL|XS,Rd,Rn,Rm,Cc) # define B(Simm26) o26(A64_B,Simm26) # define BL(Simm26) o26(A64_BL,Simm26) # define BR(Rn) o_x_(A64_BR,Rn) @@ -572,6 +574,10 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshr_u(r0,r1,r2) LSR(r0,r1,r2) # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) MVN(r0,r1) # define andr(r0,r1,r2) AND(r0,r1,r2) @@ -1376,6 +1382,20 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPI(r2, 0); + CSEL(r0, r0, r1, CC_NE); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPI(r2, 0); + CSEL(r0, r0, r1, CC_EQ); +} + +static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index 7e22e0e..4fa7a42 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -399,4 +399,6 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* movnr */ + 8, /* movzr */ #endif /* __WORDSIZE */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 5b2ff49..369408c 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1135,6 +1135,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); case_rr(mov,); + case_rrr(movn,); + case_rrr(movz,); case jit_code_movi: if (node->flag & jit_flag_node) { temp = node->v.n; diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index f0f5111..c4a550d 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -843,6 +843,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define comr(r0,r1) _comr(_jit,r0,r1) static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define negr(r0,r1) _negr(_jit,r0,r1) @@ -1583,6 +1587,35 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void +_movznr(jit_state_t *_jit, int ct, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2) +{ + if (jit_thumb_p()) { + if (r2 < 7) + T1_CMPI(r2, 0); + else + T2_CMPI(r2, 0); + IT(ct); + T1_MOV(r0, r1); + } else { + CMPI(r2, 0); + CC_MOV(ct, r0, r1); + } +} + +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + _movznr(_jit, ARM_CC_NE, r0, r1, r2); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + _movznr(_jit, ARM_CC_EQ, r0, r1, r2); +} + +static void _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_thumb_p()) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 9f0d012..6368c13 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -804,5 +804,7 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* movnr */ + 8, /* movzr */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 783fa90..051f84d 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1503,6 +1503,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _s); case_rr(ext, _us); case_rr(mov,); + case_rrr(movn,); + case_rrr(movz,); case jit_code_movi: if (node->flag & jit_flag_node) { temp = node->v.n; diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index d844c26..6541a7b 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -391,6 +391,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) # endif # define J(i0) hi(MIPS_J,i0) +# define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN) # define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ) # define comr(r0,r1) xori(r0,r1,-1) # define negr(r0,r1) subr(r0,_ZERO_REGNO,r1) @@ -506,6 +507,8 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) MOVN(r0, r1, r2) +# define movzr(r0,r1,r2) MOVZ(r0, r1, r2) # define ldr_c(r0,r1) LB(r0,0,r1) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 613aa00..b33fef2 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -1207,4 +1207,6 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ + 4, /* movnr */ + 4, /* movzr */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index dafade8..5ffad2b 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1428,6 +1428,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 9e99771..0046a05 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -505,6 +505,10 @@ static void _nop(jit_state_t*,jit_int32_t); static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) static void _movi(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define negr(r0,r1) NEG(r0,r1) @@ -1120,6 +1124,22 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPWI(r2, 0); + BEQ(8); + MR(r0, r1); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPWI(r2, 0); + BNE(8); + MR(r0, r1); +} + static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 788ac45..28251b4 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -401,6 +401,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 12, /* movnr */ + 12, /* movzr */ #endif /* _CALL_SYV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -809,6 +811,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 12, /* movnr */ + 12, /* movzr */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -1216,6 +1220,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 12, /* movnr */ + 12, /* movzr */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1622,6 +1628,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 12, /* movnr */ + 12, /* movzr */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 0826f4e..d05d4b1 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1358,6 +1358,8 @@ _emit_code(jit_state_t *_jit) # endif case_rr(neg,); case_rr(com,); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 547f36c..6dcf672 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -369,6 +369,10 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t); static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t); # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t); +#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) +static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) +static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # if __X64 && !__X64_32 # define movir(r0, r1) _movir(_jit, r0, r1) static void _movir(jit_state_t*,jit_int32_t,jit_int32_t); @@ -698,6 +702,7 @@ static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); # define ffsl(l) __builtin_ffsl(l) # endif # endif +# define jit_cmov_p() jit_cpu.cmov #endif #if CODE @@ -2213,6 +2218,32 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) mrm(0x03, r7(r0), r7(r1)); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x45); + mrm(0x03, r7(r0), r7(r1)); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x44); + mrm(0x03, r7(r0), r7(r1)); +} + #if __X64 static void _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index 663b840..2cf8880 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -399,6 +399,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #endif #if __X64 @@ -802,6 +804,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #else # if __X64_32 @@ -1204,6 +1208,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ # else #define JIT_INSTR_MAX 115 @@ -1605,6 +1611,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 7dd900e..133ee39 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1674,6 +1674,8 @@ _emit_code(jit_state_t *_jit) case_rrw(gt, _u); case_rrr(ne,); case_rrw(ne,); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/lib/lightning.c b/lib/lightning.c index 0ed663d..15fc23d 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1435,6 +1435,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_unordi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl; break; + case jit_code_movnr: case jit_code_movzr: case jit_code_addr: case jit_code_addxr: case jit_code_addcr: case jit_code_subr: case jit_code_subxr: case jit_code_subcr: case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: |