diff --git a/check/.gitignore b/check/.gitignore index 470aeaa..a0047bb 100644 --- a/check/.gitignore +++ b/check/.gitignore @@ -4,6 +4,7 @@ nodata *.trs 3to2 +bswap add align allocai diff --git a/check/Makefile.am b/check/Makefile.am index f1155d7..fc9f232 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -65,6 +65,7 @@ EXTRA_DIST = \ ldstxi-c.tst ldstxi-c.ok \ cvt.tst cvt.ok \ hton.tst hton.ok \ + bswap.tst bswap.ok \ branch.tst branch.ok \ alu.inc \ alu_add.tst alu_add.ok \ @@ -117,7 +118,7 @@ base_TESTS = \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ - cvt hton branch \ + cvt hton bswap branch \ alu_add alux_add \ alu_sub alux_sub alu_rsb \ alu_mul alu_div alu_rem \ @@ -196,7 +197,7 @@ arm_TESTS = \ rpn.arm ldstr.arm ldsti.arm \ ldstxr.arm ldstxi.arm \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ - cvt.arm hton.arm branch.arm \ + cvt.arm hton.arm bswap.arm branch.arm \ alu_add.arm alux_add.arm \ alu_sub.arm alux_sub.arm alu_rsb.arm \ alu_mul.arm alu_div.arm alu_rem.arm \ @@ -222,7 +223,7 @@ swf_TESTS = \ rpn.swf ldstr.swf ldsti.swf \ ldstxr.swf ldstxi.swf \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ - cvt.swf hton.swf branch.swf \ + cvt.swf hton.swf bswap.swf branch.swf \ alu_add.swf alux_add.swf \ alu_sub.swf alux_sub.swf alu_rsb.swf \ alu_mul.swf alu_div.swf alu_rem.swf \ @@ -246,7 +247,7 @@ arm_swf_TESTS = \ rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \ ldstxr.arm.swf ldstxi.arm.swf \ ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \ - cvt.arm.swf hton.arm.swf branch.arm.swf \ + cvt.arm.swf hton.arm.swf bswap.arm.swf branch.arm.swf \ alu_add.arm.swf alux_add.arm.swf \ alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \ alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf \ @@ -271,8 +272,8 @@ arm4_swf_TESTS = \ rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \ ldstxr.arm4.swf ldstxi.arm4.swf \ ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \ - cvt.arm4.swf hton.arm4.swf branch.arm4.swf \ - alu_add.arm4.swf alux_add.arm4.swf \ + cvt.arm4.swf hton.arm4.swf bswap.arm4.swf \ + branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf \ alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \ alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf \ alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \ diff --git a/check/bswap.ok b/check/bswap.ok new file mode 100644 index 0000000..9766475 --- /dev/null +++ b/check/bswap.ok @@ -0,0 +1 @@ +ok diff --git a/check/bswap.tst b/check/bswap.tst new file mode 100644 index 0000000..f123e95 --- /dev/null +++ b/check/bswap.tst @@ -0,0 +1,154 @@ +.data 16 +ok: +.c "ok\n" + +#define us12_i 0x1234 +#define us7f_i 0x7ff7 +#define us80_i 0x8008 +#define usff_i 0xffff +#define ui12_i 0x01234567 +#define ui7f_i 0x7f7ff7f7 +#define ui80_i 0x80800808 +#define uiff_i 0xffffffff +#define ul12_i 0x0123456789abcdef +#define ul7f_i 0x7f7f7f7ff7f7f7f7 +#define ul80_i 0x8080808008080808 +#define ulff_i 0xffffffffffffffff + +#if __WORDSIZE == 32 +# define xus12_i 0xffff1234 +# define xus7f_i 0x10107ff7 +# define xus80_i 0x81188008 +# define xusff_i 0xeaaeffff +#else +# define xus12_i 0xffffffffffff1234 +# define xus7f_i 0x1010100101017ff7 +# define xus80_i 0x8181811818818008 +# define xusff_i 0xeaeaeaaeaeaeffff +# define xui12_i 0xffffffff01234567 +# define xui7f_i 0x101001017f7ff7f7 +# define xui80_i 0x8181181880800808 +# define xuiff_i 0xeaeaaeaeffffffff +#endif + +# define us12_o 0x3412 +# define us7f_o 0xf77f +# define us80_o 0x0880 +# define usff_o 0xffff +# define ui12_o 0x67452301 +# define ui7f_o 0xf7f77f7f +# define ui80_o 0x08088080 +# define uiff_o 0xffffffff +# define ul12_o 0xefcdab8967452301 +# define ul7f_o 0xf7f7f7f77f7f7f7f +# define ul80_o 0x0808080880808080 +# define ulff_o 0xffffffffffffffff + +#define BSWAP4(I, O, T, R0, R1) \ + movi %R0 I \ + bswapr_##T %R1 %R0 \ + beqi T##R0##R1##I %R1 O \ + calli @abort \ +T##R0##R1##I: + +#define BSWAP3(T, R0, R1) \ + BSWAP4(T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(x##T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(x##T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(x##T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(T##ff_i, T##ff_o, T, R0, R1) \ + BSWAP4(x##T##ff_i, T##ff_o, T, R0, R1) + +#define BSWAP3x(T, R0, R1) \ + BSWAP4(T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(T##ff_i, T##ff_o, T, R0, R1) + +#define BSWAP2(T, V0, V1, V2, R0, R1, R2) \ + BSWAP3(T, V0, V0) \ + BSWAP3(T, V0, V1) \ + BSWAP3(T, V0, V2) \ + BSWAP3(T, V0, R0) \ + BSWAP3(T, V0, R1) \ + BSWAP3(T, V0, R2) \ + +#define BSWAP2x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP3x(T, V0, V0) \ + BSWAP3x(T, V0, V1) \ + BSWAP3x(T, V0, V2) \ + BSWAP3x(T, V0, R0) \ + BSWAP3x(T, V0, R1) \ + BSWAP3x(T, V0, R2) \ + +#define BSWAP1(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2(T, V1, V2, R0, R1, R2, V0) \ + BSWAP2(T, V2, R0, R1, R2, V0, V1) \ + BSWAP2(T, R0, R1, R2, V0, V1, V2) \ + BSWAP2(T, R1, R2, V0, V1, V2, R0) \ + BSWAP2(T, R2, V0, V1, V2, R0, R1) + +#define BSWAP1x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2x(T, V1, V2, R0, R1, R2, V0) \ + BSWAP2x(T, V2, R0, R1, R2, V0, V1) \ + BSWAP2x(T, R0, R1, R2, V0, V1, V2) \ + BSWAP2x(T, R1, R2, V0, V1, V2, R0) \ + BSWAP2x(T, R2, V0, V1, V2, R0, R1) + +#if __WORDSIZE == 32 +# define BSWAP(V0, V1, V2, R0, R1, R2) \ + BSWAP1(us, V0, V1, V2, R0, R1, R2) \ + BSWAP1x(ui, V0, V1, V2, R0, R1, R2) +#else +# define BSWAP(V0, V1, V2, R0, R1, R2) \ + BSWAP1(us, V0, V1, V2, R0, R1, R2) \ + BSWAP1(ui, V0, V1, V2, R0, R1, R2) \ + BSWAP1x(ul, V0, V1, V2, R0, R1, R2) +#endif + +.code + prolog + /* simple sequence for easier disassembly reading and encoding check */ + movi %r0 us12_i + bswapr_us %r1 %r0 + beqi us %r1 us12_o + calli @abort +us: + + movi %r0 xus12_i + bswapr_us %r1 %r0 + beqi xus %r1 us12_o + calli @abort +xus: + movi %r0 ui12_i + bswapr_ui %r1 %r0 + beqi ui %r1 ui12_o + calli @abort +ui: +#if __WORDSIZE == 64 + movi %r0 xui12_i + bswapr_ui %r1 %r0 + beqi xui %r1 ui12_o + calli @abort +xui: + movi %r0 ul12_i + bswapr_ul %r1 %r0 + beqi ul %r1 ul12_o + calli @abort +ul: +#endif + + BSWAP(v0, v1, v2, r0, r1, r2) + + // just to know did not abort + prepare + pushargi ok + ellipsis + finishi @printf + + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index 9798a5b..3cf3e70 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -328,6 +328,11 @@ static void htonr_ui(void); static void ntohr_ui(void); static void htonr_ul(void); static void ntohr_ul(void); #endif static void htonr(void); static void ntohr(void); +static void bswapr_us(void); static void bswapr_ui(void); +#if __WORDSIZE == 64 +static void bswapr_ul(void); +#endif +static void bswapr(void); static void movnr(void); static void movzr(void); static void ldr_c(void); static void ldi_c(void); static void ldr_uc(void); static void ldi_uc(void); @@ -643,6 +648,11 @@ static instr_t instr_vector[] = { entry(htonr_ul), entry(ntohr_ul), #endif entry(htonr), entry(ntohr), + entry(bswapr_us), entry(bswapr_ui), +#if __WORDSIZE == 64 + entry(bswapr_ul), +#endif + entry(bswapr), entry(movnr), entry(movzr), entry(ldr_c), entry(ldi_c), entry(ldr_uc), entry(ldi_uc), @@ -1492,6 +1502,11 @@ entry_ir_ir(htonr_ui) entry_ir_ir(ntohr_ui) entry_ir_ir(htonr_ul) entry_ir_ir(ntohr_ul) #endif entry_ir_ir(htonr) entry_ir_ir(ntohr) +entry_ir_ir(bswapr_us) entry_ir_ir(bswapr_ui) +#if __WORDSIZE == 64 +entry_ir_ir(bswapr_ul) +#endif +entry_ir_ir(bswapr) entry_ir_ir_ir(movnr) entry_ir_ir_ir(movzr) entry_ir_ir(ldr_c) entry_ir_pm(ldi_c) entry_ir_ir(ldr_uc) entry_ir_pm(ldi_uc) diff --git a/doc/body.texi b/doc/body.texi index 48bfb9e..c174fcf 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -372,6 +372,14 @@ htonr _us _ui _ul @r{Host-to-network (big endian) order} ntohr _us _ui _ul @r{Network-to-host order } @end example +@code{bswapr} can be used to unconditionally byte-swap an operand. +On little-endian architectures, @code{htonr} and @code{ntohr} resolve +to this. +The @code{_ul} variant is only available in 64-bit architectures. +@example +bswapr _us _ui _ul 01 = byte_swap(02) +@end example + @item Load operations @code{ld} accepts two operands while @code{ldx} accepts three; in both cases, the last can be either a register or an immediate diff --git a/include/lightning.h.in b/include/lightning.h.in index 50c6ee8..2c540cb 100644 --- a/include/lightning.h.in +++ b/include/lightning.h.in @@ -896,6 +896,18 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif + jit_code_last_code } jit_code_t; diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 53698b0..7d2a99d 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -663,17 +663,11 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxr_l(r0,r1,r2) STR(r2,r1,r0) # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) REV(r0,r1) -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) REV(r0,r1) # define extr_c(r0,r1) SXTB(r0,r1) # define extr_uc(r0,r1) UXTB(r0,r1) # define extr_s(r0,r1) SXTH(r0,r1) @@ -1461,21 +1455,19 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 48); } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 32); } -#endif static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index 33a0410..e1f6d96 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -401,4 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 369408c..f0be046 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -1128,6 +1128,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index d8ca34a..2dd701d 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -626,18 +626,12 @@ static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); # define jmpr(r0) JMP(_R31_REGNO,r0,0) # define jmpi(i0) _jmpi(_jit,i0) static void _jmpi(jit_state_t*, jit_word_t); @@ -2475,7 +2469,7 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -2487,7 +2481,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; @@ -2513,7 +2507,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index f37b748..ecfeba3 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -401,4 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 16, /* bswapr_us */ + 36, /* bswapr_ui */ + 36, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index 4957f1a..d7bb3ec 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -1086,6 +1086,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index c4a550d..14ba36b 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -1095,15 +1095,10 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) _extr_uc(_jit,r0,r1) @@ -3609,11 +3604,9 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; if (jit_thumb_p()) { if ((r0|r1) < 8) T1_REV(r0, r1); @@ -3627,20 +3620,14 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) rshi_u(r0, r0, 16); } else { - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + generic_bswapr_us(_jit, r0, r1); } } } /* inline glibc htonl (without register clobber) */ static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_thumb_p()) { @@ -3662,7 +3649,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } } } -#endif static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 008e6f9..293d306 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -402,6 +402,9 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -808,5 +811,8 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 051f84d..0fdd1a7 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1498,6 +1498,8 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _i); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index e304ee0..6ca54f3 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -658,12 +658,8 @@ static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0) #define extr_s(r0,r1) EXTRWR(r1,31,16,r0) #define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0) -#if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) -#else -# error need htonr implementation -#endif +#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1) +#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1) #define addr(r0,r1,r2) ADD(r1,r2,r0) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 544926f..1bfb7e6 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -401,4 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 36, /* bswapr_us */ + 80, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 8d22ede..2668842 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -1054,6 +1054,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 9953875..63bb92d 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1311,17 +1311,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -#if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) MUX1(r0,r1,MUX_REV) -#else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -#endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV) #define extr_c(r0,r1) SXT1(r0,r1) #define extr_uc(r0,r1) ZXT1(r0,r1) #define extr_s(r0,r1) SXT2(r0,r1) @@ -3506,6 +3500,20 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 48); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 32); +} + +static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; @@ -3971,48 +3979,6 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} -#endif - static void _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index 7906c7b..c81b3ea 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -68,9 +68,9 @@ 16, /* lshr */ 16, /* lshi */ 16, /* rshr */ - 16, /* rshi */ + 32, /* rshi */ 16, /* rshr_u */ - 16, /* rshi_u */ + 32, /* rshi_u */ 16, /* negr */ 16, /* comr */ 32, /* ltr */ @@ -103,8 +103,8 @@ 16, /* extr_us */ 16, /* extr_i */ 16, /* extr_ui */ - 64, /* htonr_us */ - 160, /* htonr_ui */ + 48, /* htonr_us */ + 48, /* htonr_ui */ 16, /* htonr_ul */ 16, /* ldr_c */ 32, /* ldi_c */ @@ -401,4 +401,7 @@ 0, /* movi_d_ww */ 16, /* movr_d_w */ 32, /* movi_d_w */ + 48, /* bswapr_us */ + 48, /* bswapr_ui */ + 16, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index c388907..8b4cd00 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1198,6 +1198,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 57b0c88..0625589 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -607,23 +607,12 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # endif -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -1765,33 +1754,20 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; - if (jit_mips2_p()) { extr_us(r0, r1); WSBH(r0, r0); } else { - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + generic_bswapr_us(_jit, r0, r1); } } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - if (jit_mips2_p()) { if (__WORDSIZE == 64) { SLL(r0, r1, 0); @@ -1803,45 +1779,11 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) ROTR(r0, r0, 16); } } else { - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); + generic_bswapr_ui(_jit, r0, r1); } } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); -} -# endif - -static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { if (jit_mips2_p()) diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index da37848..b4642fa 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -1,7 +1,7 @@ #if __WORDSIZE == 32 #if NEW_ABI -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 52 0, /* data */ 0, /* live */ 0, /* align */ @@ -402,6 +402,9 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -808,11 +811,14 @@ 8, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ 4, /* align */ @@ -1213,4 +1219,7 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 5ffad2b..94fe797 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1420,6 +1420,11 @@ _emit_code(jit_state_t *_jit) #if __WORDSIZE == 64 case_rr(hton, _ul); #endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_names.c b/lib/jit_names.c index cc640e8..ebd3d56 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -228,4 +228,6 @@ static char *code_name[] = { "movr_f_w", "movi_f_w", "movr_d_ww", "movi_d_ww", "movr_d_w", "movi_d_w", + "bswapr_us", + "bswapr_ui", "bswapr_ul", }; diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 0046a05..0517692 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -521,23 +521,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define extr_i(r0,r1) EXTSW(r0,r1) # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif -# if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif -# else -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif +# define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1) +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define addr(r0,r1,r2) ADD(r0,r1,r2) # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -1158,22 +1146,8 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (word); } -# if __BYTE_ORDER == __LITTLE_ENDIAN -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); @@ -1188,22 +1162,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } -# if __WORDSIZE == 64 -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); -} -# endif -# endif - static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 817af11..0be7047 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -403,6 +403,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* _CALL_SYV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -813,6 +816,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -1222,6 +1228,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1630,6 +1639,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index d05d4b1..e94d1a5 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1356,6 +1356,11 @@ _emit_code(jit_state_t *_jit) # if __WORDSIZE == 64 case_rr(hton, _ul); # endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +# if __WORDSIZE == 64 + case_rr(bswap, _ul); +# endif case_rr(neg,); case_rr(com,); case_rrr(movn,); diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c index b65ca5c..9f029c0 100644 --- a/lib/jit_riscv-cpu.c +++ b/lib/jit_riscv-cpu.c @@ -434,12 +434,9 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0, r1) andi(r0, r1, 0xff) @@ -1248,59 +1245,6 @@ DEFST(i, W) DEFST(l, D) static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi_u(rn(t0), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(t0), rn(t0)); - lshi(r0, r0, 32); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { lshi(r0, r1, 56); diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c index 8c70fcc..c8908d8 100644 --- a/lib/jit_riscv-sz.c +++ b/lib/jit_riscv-sz.c @@ -400,4 +400,7 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 16, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c index c2fcca4..1dc3c9e 100644 --- a/lib/jit_riscv.c +++ b/lib/jit_riscv.c @@ -1125,6 +1125,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 4fc39a8..619ab15 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -966,6 +966,9 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) # define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) @@ -1083,13 +1086,6 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif # define extr_c(r0,r1) LGBR(r0,r1) # define extr_uc(r0,r1) LLGCR(r0,r1) # define extr_s(r0,r1) LGHR(r0,r1) diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c index 96e6b75..bb9071d 100644 --- a/lib/jit_s390-sz.c +++ b/lib/jit_s390-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 128 0, /* data */ 0, /* live */ 6, /* align */ @@ -401,10 +401,13 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 52, /* bswapr_us */ + 128, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 344 0, /* data */ 0, /* live */ 6, /* align */ @@ -805,4 +808,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 68, /* bswapr_us */ + 160, /* bswapr_ui */ + 344, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_s390.c b/lib/jit_s390.c index aecc08a..4b89bea 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -1152,6 +1152,11 @@ _emit_code(jit_state_t *_jit) #if __WORDSIZE == 64 case_rr(hton, _ul); #endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 2ac3f5d..90c3767 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -545,6 +545,9 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); static void _movi(jit_state_t*, jit_int32_t, jit_word_t); # define movi_p(r0, i0) _movi_p(_jit, r0, i0) static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) # define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) @@ -673,7 +676,6 @@ static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) # define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) # endif -# define htonr_us(r0,r1) extr_us(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -681,11 +683,7 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# if __WORDSIZE == 64 # define extr_i(r0,r1) _extr_i(_jit,r0,r1) static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 4c905bf..5ec051d 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -1,5 +1,5 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 52 0, /* data */ 0, /* live */ 0, /* align */ @@ -400,10 +400,13 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 64 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ 4, /* align */ @@ -804,4 +807,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 84ff48c..23d4442 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -1464,6 +1464,11 @@ _emit_code(jit_state_t *_jit) #if __WORDSIZE == 64 case_rr(hton, _ul); #endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 6dcf672..81534f0 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -379,13 +379,13 @@ static void _movir(jit_state_t*,jit_int32_t,jit_int32_t); # define movir_u(r0, r1) _movir_u(_jit, r0, r1) static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t); # endif -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # if __X64 && !__X64_32 -#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); #endif # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -2263,7 +2263,7 @@ _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #endif static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { extr_us(r0, r1); ic(0x66); @@ -2274,7 +2274,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 0, _NOREG, _NOREG, r0); @@ -2284,7 +2284,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #if __X64 && !__X64_32 static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 1, _NOREG, _NOREG, r0); diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index 745f110..bd4b9a0 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -401,6 +401,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif #if __X64 @@ -806,6 +809,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ #else # if __X64_32 @@ -1210,6 +1216,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 0, /* bswapr_ul */ # else #define JIT_INSTR_MAX 115 @@ -1613,6 +1622,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 133ee39..e3e1383 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1699,6 +1699,11 @@ _emit_code(jit_state_t *_jit) #if __X64 && !__X64_32 case_rr(hton, _ul); #endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __X64 && !__X64_32 + case_rr(bswap, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/lib/lightning.c b/lib/lightning.c index cc6887f..11f4069 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1380,6 +1380,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_truncr_f_i: case jit_code_truncr_f_l: case jit_code_truncr_d_i: case jit_code_truncr_d_l: case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul: + case jit_code_bswapr_us: case jit_code_bswapr_ui: case jit_code_bswapr_ul: case jit_code_ldr_c: case jit_code_ldr_uc: case jit_code_ldr_s: case jit_code_ldr_us: case jit_code_ldr_i: case jit_code_ldr_ui: case jit_code_ldr_l: case jit_code_negr_f: @@ -3491,6 +3492,31 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, } } +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define htonr_us(r0,r1) bswapr_us(r0,r1) +# define htonr_ui(r0,r1) bswapr_ui(r0,r1) +# if __WORDSIZE == 64 +# define htonr_ul(r0,r1) bswapr_ul(r0,r1) +# endif +#else +# define htonr_us(r0,r1) extr_us(r0,r1) +# if __WORDSIZE == 32 +# define htonr_ui(r0,r1) movr(r0,r1) +# else +# define htonr_ui(r0,r1) extr_ui(r0,r1) +# define htonr_ul(r0,r1) movr(r0,r1) +# endif +#endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#endif + #if defined(__i386__) || defined(__x86_64__) # include "jit_x86.c" #elif defined(__mips__) @@ -3514,3 +3540,47 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, #elif defined(__riscv) # include "jit_riscv.c" #endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 8); + andi(r0, r1, 0xff); + andi(rn(reg), rn(reg), 0xff); + lshi(r0, r0, 8); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 16); + bswapr_us(r0, r1); + bswapr_us(rn(reg), rn(reg)); + lshi(r0, r0, 16); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi_u(rn(reg), r1, 32); + bswapr_ui(r0, r1); + bswapr_ui(rn(reg), rn(reg)); + lshi(r0, r0, 32); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} +#endif |