Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4d2911b

Browse files
TDeckingAmanieu
authored andcommitted
Rework SIMD zeroing
1 parent 91c0dab commit 4d2911b

File tree

20 files changed

+1468
-2906
lines changed

20 files changed

+1468
-2906
lines changed

‎library/stdarch/crates/core_arch/src/simd.rs‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ macro_rules! simd_ty {
1010

1111
#[allow(clippy::use_self)]
1212
impl $id {
13+
/// A value of this type where all elements are zeroed out.
14+
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15+
1316
#[inline(always)]
1417
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
1518
$id([$($param_name),*])

‎library/stdarch/crates/core_arch/src/wasm32/simd128.rs‎

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,7 +2232,7 @@ pub fn v128_any_true(a: v128) -> bool {
22322232
pub fn i8x16_abs(a: v128) -> v128 {
22332233
unsafe {
22342234
let a = a.as_i8x16();
2235-
let zero = simd::i8x16::splat(0);
2235+
let zero = simd::i8x16::ZERO;
22362236
simd_select::<simd::m8x16, simd::i8x16>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
22372237
}
22382238
}
@@ -2524,7 +2524,7 @@ pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16;
25242524
#[stable(feature = "wasm_simd", since = "1.54.0")]
25252525
pub fn i16x8_abs(a: v128) -> v128 {
25262526
let a = a.as_i16x8();
2527-
let zero = simd::i16x8::splat(0);
2527+
let zero = simd::i16x8::ZERO;
25282528
unsafe {
25292529
simd_select::<simd::m16x8, simd::i16x8>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
25302530
}
@@ -3012,7 +3012,7 @@ pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8;
30123012
#[stable(feature = "wasm_simd", since = "1.54.0")]
30133013
pub fn i32x4_abs(a: v128) -> v128 {
30143014
let a = a.as_i32x4();
3015-
let zero = simd::i32x4::splat(0);
3015+
let zero = simd::i32x4::ZERO;
30163016
unsafe {
30173017
simd_select::<simd::m32x4, simd::i32x4>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
30183018
}
@@ -3394,7 +3394,7 @@ pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8;
33943394
#[stable(feature = "wasm_simd", since = "1.54.0")]
33953395
pub fn i64x2_abs(a: v128) -> v128 {
33963396
let a = a.as_i64x2();
3397-
let zero = simd::i64x2::splat(0);
3397+
let zero = simd::i64x2::ZERO;
33983398
unsafe {
33993399
simd_select::<simd::m64x2, simd::i64x2>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
34003400
}
@@ -4105,7 +4105,7 @@ pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
41054105
let ret: simd::i32x4 = unsafe {
41064106
simd_shuffle!(
41074107
llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
4108-
simd::i32x2::splat(0),
4108+
simd::i32x2::ZERO,
41094109
[0, 1, 2, 3],
41104110
)
41114111
};
@@ -4129,7 +4129,7 @@ pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
41294129
let ret: simd::i32x4 = unsafe {
41304130
simd_shuffle!(
41314131
llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
4132-
simd::i32x2::splat(0),
4132+
simd::i32x2::ZERO,
41334133
[0, 1, 2, 3],
41344134
)
41354135
};
@@ -4176,7 +4176,7 @@ pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 {
41764176
unsafe {
41774177
simd_cast::<simd::f64x4, simd::f32x4>(simd_shuffle!(
41784178
a.as_f64x2(),
4179-
simd::f64x2::splat(0.0),
4179+
simd::f64x2::ZERO,
41804180
[0, 1, 2, 3]
41814181
))
41824182
.v128()

‎library/stdarch/crates/core_arch/src/x86/avx.rs‎

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
515515
#[cfg_attr(test, assert_instr(vblendvpd))]
516516
#[stable(feature = "simd_x86", since = "1.27.0")]
517517
pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
518-
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0));
518+
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
519519
transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
520520
}
521521

@@ -528,7 +528,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
528528
#[cfg_attr(test, assert_instr(vblendvps))]
529529
#[stable(feature = "simd_x86", since = "1.27.0")]
530530
pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
531-
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0));
531+
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
532532
transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
533533
}
534534

@@ -983,11 +983,7 @@ pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
983983
#[stable(feature = "simd_x86", since = "1.27.0")]
984984
pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
985985
static_assert_uimm_bits!(IMM1, 1);
986-
let dst: i64x2 = simd_shuffle!(
987-
a.as_i64x4(),
988-
_mm256_undefined_si256().as_i64x4(),
989-
[[0, 1], [2, 3]][IMM1 as usize],
990-
);
986+
let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
991987
transmute(dst)
992988
}
993989

@@ -2139,7 +2135,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
21392135
pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
21402136
// Propagate the highest bit to the rest, because simd_bitmask
21412137
// requires all-1 or all-0.
2142-
let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));
2138+
let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
21432139
simd_bitmask::<i64x4, u8>(mask).into()
21442140
}
21452141

@@ -2155,7 +2151,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
21552151
pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21562152
// Propagate the highest bit to the rest, because simd_bitmask
21572153
// requires all-1 or all-0.
2158-
let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));
2154+
let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
21592155
simd_bitmask::<i32x8, u8>(mask).into()
21602156
}
21612157

@@ -2167,7 +2163,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21672163
#[cfg_attr(test, assert_instr(vxorp))]
21682164
#[stable(feature = "simd_x86", since = "1.27.0")]
21692165
pub unsafe fn _mm256_setzero_pd() -> __m256d {
2170-
_mm256_set1_pd(0.0)
2166+
const{ mem::zeroed()}
21712167
}
21722168

21732169
/// Returns vector of type __m256 with all elements set to zero.
@@ -2178,7 +2174,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d {
21782174
#[cfg_attr(test, assert_instr(vxorps))]
21792175
#[stable(feature = "simd_x86", since = "1.27.0")]
21802176
pub unsafe fn _mm256_setzero_ps() -> __m256 {
2181-
_mm256_set1_ps(0.0)
2177+
const{ mem::zeroed()}
21822178
}
21832179

21842180
/// Returns vector of type __m256i with all elements set to zero.
@@ -2189,7 +2185,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 {
21892185
#[cfg_attr(test, assert_instr(vxor))]
21902186
#[stable(feature = "simd_x86", since = "1.27.0")]
21912187
pub unsafe fn _mm256_setzero_si256() -> __m256i {
2192-
_mm256_set1_epi8(0)
2188+
const{ mem::zeroed()}
21932189
}
21942190

21952191
/// Sets packed double-precision (64-bit) floating-point elements in returned
@@ -2722,7 +2718,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
27222718
#[stable(feature = "simd_x86", since = "1.27.0")]
27232719
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
27242720
let a = a.as_i64x2();
2725-
let undefined = _mm_undefined_si128().as_i64x2();
2721+
let undefined = i64x2::ZERO;
27262722
let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
27272723
transmute(dst)
27282724
}
@@ -2752,7 +2748,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
27522748
// instructions, thus it has zero latency.
27532749
#[stable(feature = "simd_x86", since = "1.27.0")]
27542750
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2755-
let b = _mm_setzero_si128().as_i64x2();
2751+
let b = i64x2::ZERO;
27562752
let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
27572753
transmute(dst)
27582754
}
@@ -2782,7 +2778,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
27822778
// This intrinsic has no corresponding instruction.
27832779
#[stable(feature = "simd_x86", since = "1.27.0")]
27842780
pub unsafe fn _mm256_undefined_ps() -> __m256 {
2785-
_mm256_set1_ps(0.0)
2781+
const{ mem::zeroed()}
27862782
}
27872783

27882784
/// Returns vector of type `__m256d` with indeterminate elements.
@@ -2795,7 +2791,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
27952791
// This intrinsic has no corresponding instruction.
27962792
#[stable(feature = "simd_x86", since = "1.27.0")]
27972793
pub unsafe fn _mm256_undefined_pd() -> __m256d {
2798-
_mm256_set1_pd(0.0)
2794+
const{ mem::zeroed()}
27992795
}
28002796

28012797
/// Returns vector of type __m256i with with indeterminate elements.
@@ -2808,7 +2804,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d {
28082804
// This intrinsic has no corresponding instruction.
28092805
#[stable(feature = "simd_x86", since = "1.27.0")]
28102806
pub unsafe fn _mm256_undefined_si256() -> __m256i {
2811-
__m256i([0,0,0,0])
2807+
const{ mem::zeroed()}
28122808
}
28132809

28142810
/// Sets packed __m256 returned vector with the supplied values.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /