Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 120cb65

Browse files
committed
Remove my scalar_copy_backend_type optimization attempt
I added this back in 111999, but I no longer think it's a good idea - It had to get scaled back to only power-of-two things to not break a bunch of targets - LLVM seems to be getting better at memcpy removal anyway - Introducing vector instructions has seemed to sometimes (115515) make autovectorization worse So this removes it from the codegen crates entirely, and instead just tries to use <https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/traits/builder/trait.BuilderMethods.html#method.typed_place_copy> instead of direct `memcpy` so things will still use load/store for immediates.
1 parent 548e14b commit 120cb65

File tree

9 files changed

+40
-142
lines changed

9 files changed

+40
-142
lines changed

‎compiler/rustc_codegen_llvm/src/type_.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,6 @@ impl<'ll, 'tcx> LayoutTypeMethods<'tcx> for CodegenCx<'ll, 'tcx> {
281281
fn reg_backend_type(&self, ty: &Reg) -> &'ll Type {
282282
ty.llvm_type(self)
283283
}
284-
fn scalar_copy_backend_type(&self, layout: TyAndLayout<'tcx>) -> Option<Self::Type> {
285-
layout.scalar_copy_llvm_type(self)
286-
}
287284
}
288285

289286
impl<'ll, 'tcx> TypeMembershipMethods<'tcx> for CodegenCx<'ll, 'tcx> {

‎compiler/rustc_codegen_llvm/src/type_of.rs

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use rustc_middle::bug;
55
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
66
use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
77
use rustc_middle::ty::{self, Ty, TypeVisitableExt};
8-
use rustc_target::abi::HasDataLayout;
98
use rustc_target::abi::{Abi, Align, FieldsShape};
109
use rustc_target::abi::{Int, Pointer, F128, F16, F32, F64};
1110
use rustc_target::abi::{Scalar, Size, Variants};
@@ -166,7 +165,6 @@ pub trait LayoutLlvmExt<'tcx> {
166165
index: usize,
167166
immediate: bool,
168167
) -> &'a Type;
169-
fn scalar_copy_llvm_type<'a>(&self, cx: &CodegenCx<'a, 'tcx>) -> Option<&'a Type>;
170168
}
171169

172170
impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
@@ -308,44 +306,4 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
308306

309307
self.scalar_llvm_type_at(cx, scalar)
310308
}
311-
312-
fn scalar_copy_llvm_type<'a>(&self, cx: &CodegenCx<'a, 'tcx>) -> Option<&'a Type> {
313-
debug_assert!(self.is_sized());
314-
315-
// FIXME: this is a fairly arbitrary choice, but 128 bits on WASM
316-
// (matching the 128-bit SIMD types proposal) and 256 bits on x64
317-
// (like AVX2 registers) seems at least like a tolerable starting point.
318-
let threshold = cx.data_layout().pointer_size * 4;
319-
if self.layout.size() > threshold {
320-
return None;
321-
}
322-
323-
// Vectors, even for non-power-of-two sizes, have the same layout as
324-
// arrays but don't count as aggregate types
325-
// While LLVM theoretically supports non-power-of-two sizes, and they
326-
// often work fine, sometimes x86-isel deals with them horribly
327-
// (see #115212) so for now only use power-of-two ones.
328-
if let FieldsShape::Array { count, .. } = self.layout.fields()
329-
&& count.is_power_of_two()
330-
&& let element = self.field(cx, 0)
331-
&& element.ty.is_integral()
332-
{
333-
// `cx.type_ix(bits)` is tempting here, but while that works great
334-
// for things that *stay* as memory-to-memory copies, it also ends
335-
// up suppressing vectorization as it introduces shifts when it
336-
// extracts all the individual values.
337-
338-
let ety = element.llvm_type(cx);
339-
if *count == 1 {
340-
// Emitting `<1 x T>` would be silly; just use the scalar.
341-
return Some(ety);
342-
} else {
343-
return Some(cx.type_vector(ety, *count));
344-
}
345-
}
346-
347-
// FIXME: The above only handled integer arrays; surely more things
348-
// would also be possible. Be careful about provenance, though!
349-
None
350-
}
351309
}

‎compiler/rustc_codegen_ssa/src/base.rs

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::mir;
1212
use crate::mir::operand::OperandValue;
1313
use crate::mir::place::PlaceRef;
1414
use crate::traits::*;
15-
use crate::{CachedModuleCodegen, CompiledModule, CrateInfo, MemFlags,ModuleCodegen, ModuleKind};
15+
use crate::{CachedModuleCodegen, CompiledModule, CrateInfo, ModuleCodegen, ModuleKind};
1616

1717
use rustc_ast::expand::allocator::{global_fn_name, AllocatorKind, ALLOCATOR_METHODS};
1818
use rustc_attr as attr;
@@ -37,7 +37,7 @@ use rustc_session::config::{self, CrateType, EntryFnType, OutputType};
3737
use rustc_session::Session;
3838
use rustc_span::symbol::sym;
3939
use rustc_span::Symbol;
40-
use rustc_target::abi::{Align,FIRST_VARIANT};
40+
use rustc_target::abi::FIRST_VARIANT;
4141

4242
use std::cmp;
4343
use std::collections::BTreeSet;
@@ -282,15 +282,7 @@ pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
282282
}
283283

284284
if src_f.layout.ty == dst_f.layout.ty {
285-
memcpy_ty(
286-
bx,
287-
dst_f.llval,
288-
dst_f.align,
289-
src_f.llval,
290-
src_f.align,
291-
src_f.layout,
292-
MemFlags::empty(),
293-
);
285+
bx.typed_place_copy(dst_f, src_f);
294286
} else {
295287
coerce_unsized_into(bx, src_f, dst_f);
296288
}
@@ -355,30 +347,6 @@ pub fn wants_new_eh_instructions(sess: &Session) -> bool {
355347
wants_wasm_eh(sess) || wants_msvc_seh(sess)
356348
}
357349

358-
pub fn memcpy_ty<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
359-
bx: &mut Bx,
360-
dst: Bx::Value,
361-
dst_align: Align,
362-
src: Bx::Value,
363-
src_align: Align,
364-
layout: TyAndLayout<'tcx>,
365-
flags: MemFlags,
366-
) {
367-
let size = layout.size.bytes();
368-
if size == 0 {
369-
return;
370-
}
371-
372-
if flags == MemFlags::empty()
373-
&& let Some(bty) = bx.cx().scalar_copy_backend_type(layout)
374-
{
375-
let temp = bx.load(bty, src, src_align);
376-
bx.store(temp, dst, dst_align);
377-
} else {
378-
bx.memcpy(dst, dst_align, src, src_align, bx.cx().const_usize(size), flags);
379-
}
380-
}
381-
382350
pub fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>(
383351
cx: &'a Bx::CodegenCx,
384352
instance: Instance<'tcx>,

‎compiler/rustc_codegen_ssa/src/mir/block.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use crate::common::{self, IntPredicate};
88
use crate::errors::CompilerBuiltinsCannotCall;
99
use crate::meth;
1010
use crate::traits::*;
11-
use crate::MemFlags;
1211

1312
use rustc_ast as ast;
1413
use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
@@ -1454,7 +1453,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
14541453
}
14551454
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),
14561455
},
1457-
Ref(llval, _, align) => match arg.mode {
1456+
Ref(llval, llextra, align) => match arg.mode {
14581457
PassMode::Indirect { attrs, .. } => {
14591458
let required_align = match attrs.pointee_align {
14601459
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
@@ -1465,22 +1464,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
14651464
// alignment requirements may be higher than the type's alignment, so copy
14661465
// to a higher-aligned alloca.
14671466
let scratch = PlaceRef::alloca_aligned(bx, arg.layout, required_align);
1468-
base::memcpy_ty(
1469-
bx,
1470-
scratch.llval,
1471-
scratch.align,
1472-
llval,
1473-
align,
1474-
op.layout,
1475-
MemFlags::empty(),
1476-
);
1467+
let op_place = PlaceRef { llval, llextra, layout: op.layout, align };
1468+
bx.typed_place_copy(scratch, op_place);
14771469
(scratch.llval, scratch.align, true)
14781470
} else {
14791471
(llval, align, true)
14801472
}
14811473
}
14821474
_ => (llval, align, true),
14831475
},
1476+
14841477
ZeroSized => match arg.mode {
14851478
PassMode::Indirect { on_stack, .. } => {
14861479
if on_stack {

‎compiler/rustc_codegen_ssa/src/mir/operand.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use super::place::PlaceRef;
22
use super::{FunctionCx, LocalRef};
33

4-
use crate::base;
54
use crate::size_of_val;
65
use crate::traits::*;
76
use crate::MemFlags;
@@ -419,7 +418,8 @@ impl<'a, 'tcx, V: CodegenObject> OperandValue<V> {
419418
bx.store_with_flags(val, dest.llval, dest.align, flags);
420419
return;
421420
}
422-
base::memcpy_ty(bx, dest.llval, dest.align, r, source_align, dest.layout, flags)
421+
let size = bx.cx().const_usize(dest.layout.size.bytes());
422+
bx.memcpy(dest.llval, dest.align, r, source_align, size, flags);
423423
}
424424
OperandValue::Ref(_, Some(_), _) => {
425425
bug!("cannot directly store unsized values");

‎compiler/rustc_codegen_ssa/src/traits/type_.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -133,28 +133,6 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
133133
|| self.is_backend_immediate(layout)
134134
|| self.is_backend_scalar_pair(layout))
135135
}
136-
137-
/// A type that can be used in a [`super::BuilderMethods::load`] +
138-
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
139-
/// such as a MIR `*_0 = *_1`.
140-
///
141-
/// It's always legal to return `None` here, as the provided impl does,
142-
/// in which case callers should use [`super::BuilderMethods::memcpy`]
143-
/// instead of the `load`+`store` pair.
144-
///
145-
/// This can be helpful for things like arrays, where the LLVM backend type
146-
/// `[3 x i16]` optimizes to three separate loads and stores, but it can
147-
/// instead be copied via an `i48` that stays as the single `load`+`store`.
148-
/// (As of 2023-05 LLVM cannot necessarily optimize away a `memcpy` in these
149-
/// cases, due to `poison` handling, but in codegen we have more information
150-
/// about the type invariants, so can emit something better instead.)
151-
///
152-
/// This *should* return `None` for particularly-large types, where leaving
153-
/// the `memcpy` may well be important to avoid code size explosion.
154-
fn scalar_copy_backend_type(&self, layout: TyAndLayout<'tcx>) -> Option<Self::Type> {
155-
let _ = layout;
156-
None
157-
}
158136
}
159137

160138
// For backends that support CFI using type membership (i.e., testing whether a given pointer is

‎tests/codegen/array-codegen.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,58 @@
55
// CHECK-LABEL: @array_load
66
#[no_mangle]
77
pub fn array_load(a: &[u8; 4]) -> [u8; 4] {
8-
// CHECK: %_0 = alloca [4 x i8], align 1
9-
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
10-
// CHECK: store <4 x i8> %[[TEMP1]], ptr %_0, align 1
11-
// CHECK: %[[TEMP2:.+]] = load i32, ptr %_0, align 1
12-
// CHECK: ret i32 %[[TEMP2]]
8+
// CHECK-NOT: alloca
9+
// CHECK: %[[ALLOCA:.+]] = alloca [4 x i8], align 1
10+
// CHECK-NOT: alloca
11+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[ALLOCA]], ptr align 1 %a, {{.+}} 4, i1 false)
12+
// CHECK: %[[TEMP:.+]] = load i32, ptr %[[ALLOCA]], align 1
13+
// CHECK: ret i32 %[[TEMP]]
1314
*a
1415
}
1516

1617
// CHECK-LABEL: @array_store
1718
#[no_mangle]
1819
pub fn array_store(a: [u8; 4], p: &mut [u8; 4]) {
20+
// CHECK-NOT: alloca
21+
// CHECK: %[[TEMP:.+]] = alloca i32, [[TEMPALIGN:align [0-9]+]]
22+
// CHECK-NOT: alloca
1923
// CHECK: %a = alloca [4 x i8]
20-
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
21-
// CHECK-NEXT: store <4 x i8> %[[TEMP]], ptr %p, align 1
24+
// CHECK-NOT: alloca
25+
// store i32 %0, ptr %[[TEMP]]
26+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %a, ptr [[TEMPALIGN]] %[[TEMP]], {{.+}} 4, i1 false)
27+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %a, {{.+}} 4, i1 false)
2228
*p = a;
2329
}
2430

2531
// CHECK-LABEL: @array_copy
2632
#[no_mangle]
2733
pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
34+
// CHECK-NOT: alloca
2835
// CHECK: %[[LOCAL:.+]] = alloca [4 x i8], align 1
29-
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
30-
// CHECK: store <4 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
31-
// CHECK: %[[TEMP2:.+]] = load <4 x i8>, ptr %[[LOCAL]], align 1
32-
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
36+
// CHECK-NOT: alloca
37+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 4, i1 false)
38+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 4, i1 false)
3339
*p = *a;
3440
}
3541

3642
// CHECK-LABEL: @array_copy_1_element
3743
#[no_mangle]
3844
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
45+
// CHECK-NOT: alloca
3946
// CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
40-
// CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
41-
// CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1
42-
// CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1
43-
// CHECK: store i8 %[[TEMP2]], ptr %p, align 1
47+
// CHECK-NOT: alloca
48+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 1, i1 false)
49+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 1, i1 false)
4450
*p = *a;
4551
}
4652

4753
// CHECK-LABEL: @array_copy_2_elements
4854
#[no_mangle]
4955
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
56+
// CHECK-NOT: alloca
5057
// CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
51-
// CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1
52-
// CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
53-
// CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1
54-
// CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1
58+
// CHECK-NOT: alloca
59+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 2, i1 false)
60+
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 2, i1 false)
5561
*p = *a;
5662
}

‎tests/codegen/array-optimized.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
1616
#[no_mangle]
1717
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
1818
// CHECK-NOT: alloca
19-
// CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1
20-
// CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1
19+
// CHECK: %[[TEMP:.+]] = load i16, ptr %a, align 1
20+
// CHECK: store i16 %[[TEMP]], ptr %p, align 1
2121
// CHECK: ret
2222
*p = *a;
2323
}
@@ -26,8 +26,8 @@ pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
2626
#[no_mangle]
2727
pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) {
2828
// CHECK-NOT: alloca
29-
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
30-
// CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1
29+
// CHECK: %[[TEMP:.+]] = load i32, ptr %a, align 1
30+
// CHECK: store i32 %[[TEMP]], ptr %p, align 1
3131
// CHECK: ret
3232
*p = *a;
3333
}

‎tests/codegen/mem-replace-simple-type.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
4545
// CHECK-LABEL: @replace_short_array_4(
4646
pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] {
4747
// CHECK-NOT: alloca
48-
// CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4
49-
// CHECK: store <4 x i32> %[[R]], ptr %result
50-
// CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4
51-
// CHECK: store <4 x i32> %[[V]], ptr %r
48+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 16, i1 false)
49+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 16, i1 false)
5250
std::mem::replace(r, v)
5351
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /