Commit 4d2911b

TDeckingAmanieu

authored and

committed

Rework SIMD zeroing

1 parent 91c0dab commit 4d2911bCopy full SHA for 4d2911b

File tree

20 files changed

+1468

-2906

lines changed

library/stdarch/crates/core_arch/src

20 files changed

+1468

-2906

lines changed

`‎library/stdarch/crates/core_arch/src/simd.rs‎`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,9 @@ macro_rules! simd_ty {`
`10`	`10`
`11`	`11`	`#[allow(clippy::use_self)]`
`12`	`12`	`impl $id {`
	`13`	`+ /// A value of this type where all elements are zeroed out.`
	`14`	`+ pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };`
	`15`	`+`
`13`	`16`	`#[inline(always)]`
`14`	`17`	`pub(crate) const fn new($($param_name: $elem_type),*) -> Self {`
`15`	`18`	`$id([$($param_name),*])`

`‎library/stdarch/crates/core_arch/src/wasm32/simd128.rs‎`

Lines changed: 7 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -2232,7 +2232,7 @@ pub fn v128_any_true(a: v128) -> bool {`
`2232`	`2232`	`pub fn i8x16_abs(a: v128) -> v128 {`
`2233`	`2233`	`unsafe {`
`2234`	`2234`	`let a = a.as_i8x16();`
`2235`		`- let zero = simd::i8x16::splat(0);`
	`2235`	`+ let zero = simd::i8x16::ZERO;`
`2236`	`2236`	`simd_select::<simd::m8x16, simd::i8x16>(simd_lt(a, zero), simd_sub(zero, a), a).v128()`
`2237`	`2237`	`}`
`2238`	`2238`	`}`
`@@ -2524,7 +2524,7 @@ pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16;`
`2524`	`2524`	`#[stable(feature = "wasm_simd", since = "1.54.0")]`
`2525`	`2525`	`pub fn i16x8_abs(a: v128) -> v128 {`
`2526`	`2526`	`let a = a.as_i16x8();`
`2527`		`- let zero = simd::i16x8::splat(0);`
	`2527`	`+ let zero = simd::i16x8::ZERO;`
`2528`	`2528`	`unsafe {`
`2529`	`2529`	`simd_select::<simd::m16x8, simd::i16x8>(simd_lt(a, zero), simd_sub(zero, a), a).v128()`
`2530`	`2530`	`}`
`@@ -3012,7 +3012,7 @@ pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8;`
`3012`	`3012`	`#[stable(feature = "wasm_simd", since = "1.54.0")]`
`3013`	`3013`	`pub fn i32x4_abs(a: v128) -> v128 {`
`3014`	`3014`	`let a = a.as_i32x4();`
`3015`		`- let zero = simd::i32x4::splat(0);`
	`3015`	`+ let zero = simd::i32x4::ZERO;`
`3016`	`3016`	`unsafe {`
`3017`	`3017`	`simd_select::<simd::m32x4, simd::i32x4>(simd_lt(a, zero), simd_sub(zero, a), a).v128()`
`3018`	`3018`	`}`
`@@ -3394,7 +3394,7 @@ pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8;`
`3394`	`3394`	`#[stable(feature = "wasm_simd", since = "1.54.0")]`
`3395`	`3395`	`pub fn i64x2_abs(a: v128) -> v128 {`
`3396`	`3396`	`let a = a.as_i64x2();`
`3397`		`- let zero = simd::i64x2::splat(0);`
	`3397`	`+ let zero = simd::i64x2::ZERO;`
`3398`	`3398`	`unsafe {`
`3399`	`3399`	`simd_select::<simd::m64x2, simd::i64x2>(simd_lt(a, zero), simd_sub(zero, a), a).v128()`
`3400`	`3400`	`}`
`@@ -4105,7 +4105,7 @@ pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {`
`4105`	`4105`	`let ret: simd::i32x4 = unsafe {`
`4106`	`4106`	`simd_shuffle!(`
`4107`	`4107`	`llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),`
`4108`		`- simd::i32x2::splat(0),`
	`4108`	`+ simd::i32x2::ZERO,`
`4109`	`4109`	`[0, 1, 2, 3],`
`4110`	`4110`	`)`
`4111`	`4111`	`};`
`@@ -4129,7 +4129,7 @@ pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {`
`4129`	`4129`	`let ret: simd::i32x4 = unsafe {`
`4130`	`4130`	`simd_shuffle!(`
`4131`	`4131`	`llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),`
`4132`		`- simd::i32x2::splat(0),`
	`4132`	`+ simd::i32x2::ZERO,`
`4133`	`4133`	`[0, 1, 2, 3],`
`4134`	`4134`	`)`
`4135`	`4135`	`};`
`@@ -4176,7 +4176,7 @@ pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 {`
`4176`	`4176`	`unsafe {`
`4177`	`4177`	`simd_cast::<simd::f64x4, simd::f32x4>(simd_shuffle!(`
`4178`	`4178`	`a.as_f64x2(),`
`4179`		`- simd::f64x2::splat(0.0),`
	`4179`	`+ simd::f64x2::ZERO,`
`4180`	`4180`	`[0, 1, 2, 3]`
`4181`	`4181`	`))`
`4182`	`4182`	`.v128()`

`‎library/stdarch/crates/core_arch/src/x86/avx.rs‎`

Lines changed: 13 additions & 17 deletions

Original file line number	Diff line number	Diff line change
`@@ -515,7 +515,7 @@ pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {`
`515`	`515`	`#[cfg_attr(test, assert_instr(vblendvpd))]`
`516`	`516`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`517`	`517`	`pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {`
`518`		`- let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0));`
	`518`	`+ let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);`
`519`	`519`	`transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))`
`520`	`520`	`}`
`521`	`521`
`@@ -528,7 +528,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {`
`528`	`528`	`#[cfg_attr(test, assert_instr(vblendvps))]`
`529`	`529`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`530`	`530`	`pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {`
`531`		`- let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0));`
	`531`	`+ let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);`
`532`	`532`	`transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))`
`533`	`533`	`}`
`534`	`534`
`@@ -983,11 +983,7 @@ pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {`
`983`	`983`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`984`	`984`	`pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {`
`985`	`985`	`static_assert_uimm_bits!(IMM1, 1);`
`986`		`- let dst: i64x2 = simd_shuffle!(`
`987`		`- a.as_i64x4(),`
`988`		`- _mm256_undefined_si256().as_i64x4(),`
`989`		`- [[0, 1], [2, 3]][IMM1 as usize],`
`990`		`- );`
	`986`	`+ let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);`
`991`	`987`	`transmute(dst)`
`992`	`988`	`}`
`993`	`989`
`@@ -2139,7 +2135,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {`
`2139`	`2135`	`pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {`
`2140`	`2136`	`// Propagate the highest bit to the rest, because simd_bitmask`
`2141`	`2137`	`// requires all-1 or all-0.`
`2142`		`- let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));`
	`2138`	`+ let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);`
`2143`	`2139`	`simd_bitmask::<i64x4, u8>(mask).into()`
`2144`	`2140`	`}`
`2145`	`2141`
`@@ -2155,7 +2151,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {`
`2155`	`2151`	`pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {`
`2156`	`2152`	`// Propagate the highest bit to the rest, because simd_bitmask`
`2157`	`2153`	`// requires all-1 or all-0.`
`2158`		`- let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));`
	`2154`	`+ let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);`
`2159`	`2155`	`simd_bitmask::<i32x8, u8>(mask).into()`
`2160`	`2156`	`}`
`2161`	`2157`
`@@ -2167,7 +2163,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {`
`2167`	`2163`	`#[cfg_attr(test, assert_instr(vxorp))]`
`2168`	`2164`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2169`	`2165`	`pub unsafe fn _mm256_setzero_pd() -> __m256d {`
`2170`		`- _mm256_set1_pd(0.0)`
	`2166`	`+ const{ mem::zeroed()}`
`2171`	`2167`	`}`
`2172`	`2168`
`2173`	`2169`	`/// Returns vector of type __m256 with all elements set to zero.`
`@@ -2178,7 +2174,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d {`
`2178`	`2174`	`#[cfg_attr(test, assert_instr(vxorps))]`
`2179`	`2175`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2180`	`2176`	`pub unsafe fn _mm256_setzero_ps() -> __m256 {`
`2181`		`- _mm256_set1_ps(0.0)`
	`2177`	`+ const{ mem::zeroed()}`
`2182`	`2178`	`}`
`2183`	`2179`
`2184`	`2180`	`/// Returns vector of type __m256i with all elements set to zero.`
`@@ -2189,7 +2185,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 {`
`2189`	`2185`	`#[cfg_attr(test, assert_instr(vxor))]`
`2190`	`2186`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2191`	`2187`	`pub unsafe fn _mm256_setzero_si256() -> __m256i {`
`2192`		`- _mm256_set1_epi8(0)`
	`2188`	`+ const{ mem::zeroed()}`
`2193`	`2189`	`}`
`2194`	`2190`
`2195`	`2191`	`/// Sets packed double-precision (64-bit) floating-point elements in returned`
`@@ -2722,7 +2718,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {`
`2722`	`2718`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2723`	`2719`	`pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {`
`2724`	`2720`	`let a = a.as_i64x2();`
`2725`		`- let undefined = _mm_undefined_si128().as_i64x2();`
	`2721`	`+ let undefined = i64x2::ZERO;`
`2726`	`2722`	`let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);`
`2727`	`2723`	`transmute(dst)`
`2728`	`2724`	`}`
`@@ -2752,7 +2748,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {`
`2752`	`2748`	`// instructions, thus it has zero latency.`
`2753`	`2749`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2754`	`2750`	`pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {`
`2755`		`- let b = _mm_setzero_si128().as_i64x2();`
	`2751`	`+ let b = i64x2::ZERO;`
`2756`	`2752`	`let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);`
`2757`	`2753`	`transmute(dst)`
`2758`	`2754`	`}`
`@@ -2782,7 +2778,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {`
`2782`	`2778`	`// This intrinsic has no corresponding instruction.`
`2783`	`2779`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2784`	`2780`	`pub unsafe fn _mm256_undefined_ps() -> __m256 {`
`2785`		`- _mm256_set1_ps(0.0)`
	`2781`	`+ const{ mem::zeroed()}`
`2786`	`2782`	`}`
`2787`	`2783`
`2788`	`2784`	/// Returns vector of type `__m256d` with indeterminate elements.
`@@ -2795,7 +2791,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {`
`2795`	`2791`	`// This intrinsic has no corresponding instruction.`
`2796`	`2792`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2797`	`2793`	`pub unsafe fn _mm256_undefined_pd() -> __m256d {`
`2798`		`- _mm256_set1_pd(0.0)`
	`2794`	`+ const{ mem::zeroed()}`
`2799`	`2795`	`}`
`2800`	`2796`
`2801`	`2797`	`/// Returns vector of type __m256i with with indeterminate elements.`
`@@ -2808,7 +2804,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d {`
`2808`	`2804`	`// This intrinsic has no corresponding instruction.`
`2809`	`2805`	`#[stable(feature = "simd_x86", since = "1.27.0")]`
`2810`	`2806`	`pub unsafe fn _mm256_undefined_si256() -> __m256i {`
`2811`		`- __m256i([0,0,0,0])`
	`2807`	`+ const{ mem::zeroed()}`
`2812`	`2808`	`}`
`2813`	`2809`
`2814`	`2810`	`/// Sets packed __m256 returned vector with the supplied values.`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 4d2911b

File tree

20 files changed

20 files changed

`‎library/stdarch/crates/core_arch/src/simd.rs‎`

`‎library/stdarch/crates/core_arch/src/wasm32/simd128.rs‎`

`‎library/stdarch/crates/core_arch/src/x86/avx.rs‎`

0 commit comments