Skip to content

Avoid memcpy in codegen for more types, notably Vec #112733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions compiler/rustc_codegen_llvm/src/type_of.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
// arrays but don't count as aggregate types
if let FieldsShape::Array { count, .. } = self.layout.fields()
&& let element = self.field(cx, 0)
&& element.ty.is_integral()
&& element.ty.is_primitive()
{
// `cx.type_ix(bits)` is tempting here, but while that works great
// for things that *stay* as memory-to-memory copies, it also ends
Expand All @@ -418,8 +418,36 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
return Some(cx.type_vector(ety, *count));
}

// FIXME: The above only handled integer arrays; surely more things
// would also be possible. Be careful about provenance, though!
None
// Ensure the type isn't too complex nor otherwise ineligible
is_scalar_copy_reasonable(4, self.ty, cx)?;

// Otherwise we can load/store it via a long-enough integer type
Some(cx.type_ix(self.layout.size().bits()))
}
}

fn is_scalar_copy_reasonable<'a, 'tcx>(
max_fields: u32,
t: Ty<'tcx>,
cx: &CodegenCx<'a, 'tcx>,
) -> Option<u32> {
if t.is_any_ptr() || t.is_primitive() {
return max_fields.checked_sub(1);
}

match t.kind() {
ty::Tuple(field_tys) => field_tys
.into_iter()
.try_fold(max_fields, |mf, tt| is_scalar_copy_reasonable(mf, tt, cx)),
// Unions are magic and can carry anything, regardless of their field
// types, so force them to always go through `memcpy`.
ty::Adt(adt_def, _) if adt_def.is_union() => None,
// If there could be multiple variants, just use `memcpy` for now.
ty::Adt(adt_def, _) if adt_def.variants().len() != 1 => None,
ty::Adt(adt_def, substs) => adt_def.all_fields().try_fold(max_fields, |mf, field_def| {
let field_ty = field_def.ty(cx.tcx, substs);
is_scalar_copy_reasonable(mf, field_ty, cx)
}),
_ => None,
}
}
2 changes: 1 addition & 1 deletion compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
b'$' => Dollar,
b'?' => Question,
b'\'' => SingleQuote,
_ => unreachable!(),
_ => unreachable!("{ch} not expected in Punct"),
};
smallvec![if joint {
tokenstream::TokenTree::token_joint(kind, span)
Expand Down
23 changes: 23 additions & 0 deletions tests/assembly/swap-strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// assembly-output: emit-asm
// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
// only-x86_64
// ignore-sgx
// ignore-debug

// Ensure that the swap uses SIMD registers and does not go to stack.

// CHECK-LABEL: swap_strings_xmm:
#[no_mangle]
pub fn swap_strings_xmm(a: &mut String, b: &mut String) {
// CHECK-DAG: movups [[A1:xmm.+]], xmmword ptr [[AX:.+]]
// CHECK-DAG: mov [[A2:r.+]], qword ptr [[AQ:.+]]
// CHECK-DAG: movups [[B1:xmm.+]], xmmword ptr [[BX:.+]]
// CHECK-DAG: mov [[B2:r.+]], qword ptr [[BQ:.+]]
// CHECK-NOT: mov
// CHECK-DAG: movups xmmword ptr [[AX]], [[B1]]
// CHECK-DAG: mov qword ptr [[AQ]], [[B2]]
// CHECK-DAG: movups xmmword ptr [[BX]], [[A1]]
// CHECK-DAG: mov qword ptr [[BQ]], [[A2]]
// CHECK: ret
std::mem::swap(a, b);
}
13 changes: 8 additions & 5 deletions tests/codegen/issues/issue-15953.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
// Test that llvm generates `memcpy` for moving a value
// inside a function and moving an argument.

#[derive(Default, Debug)]
struct RatherLargeType(usize, isize, usize, isize, usize, isize);

struct Foo {
x: Vec<i32>,
x: RatherLargeType,
}

#[inline(never)]
#[no_mangle]
// CHECK: memcpy
fn interior(x: Vec<i32>) -> Vec<i32> {
fn interior(x: RatherLargeType) -> RatherLargeType {
let Foo { x } = Foo { x: x };
x
}

#[inline(never)]
#[no_mangle]
// CHECK: memcpy
fn exterior(x: Vec<i32>) -> Vec<i32> {
fn exterior(x: RatherLargeType) -> RatherLargeType {
x
}

fn main() {
let x = interior(Vec::new());
let x = interior(RatherLargeType::default());
println!("{:?}", x);

let x = exterior(Vec::new());
let x = exterior(RatherLargeType::default());
println!("{:?}", x);
}
25 changes: 17 additions & 8 deletions tests/codegen/issues/issue-86106.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
// compile-flags: -C opt-level=3 -Z merge-functions=disabled

// The below two functions ensure that both `String::new()` and `"".to_string()`
// produce the identical code.
// generate their values directly, rather that creating a constant and copying
// that constant (which takes more instructions because of PIC).

#![crate_type = "lib"]

// CHECK-LABEL: define {{(dso_local )?}}void @string_new
#[no_mangle]
pub fn string_new() -> String {
// CHECK: store ptr inttoptr
// CHECK: store {{i16|i32|i64}} 1, ptr %_0,
// CHECK-NEXT: getelementptr
// CHECK-NEXT: call void @llvm.memset
// CHECK-NEXT: ret void
Expand All @@ -19,10 +20,8 @@ pub fn string_new() -> String {
// CHECK-LABEL: define {{(dso_local )?}}void @empty_to_string
#[no_mangle]
pub fn empty_to_string() -> String {
// CHECK: store ptr inttoptr
// CHECK-NEXT: getelementptr
// CHECK-NEXT: call void @llvm.memset
// CHECK-NEXT: ret void
// CHECK: store {{i48|i96|i192}} 1, ptr %_0, align {{2|4|8}}
// CHECK-NEXT: ret
"".to_string()
}

Expand All @@ -32,7 +31,7 @@ pub fn empty_to_string() -> String {
// CHECK-LABEL: @empty_vec
#[no_mangle]
pub fn empty_vec() -> Vec<u8> {
// CHECK: store ptr inttoptr
// CHECK: store ptr inttoptr ({{i16|i32|i64}} 1 to ptr), ptr %_0,
// CHECK-NEXT: getelementptr
// CHECK-NEXT: call void @llvm.memset
// CHECK-NEXT: ret void
Expand All @@ -42,9 +41,19 @@ pub fn empty_vec() -> Vec<u8> {
// CHECK-LABEL: @empty_vec_clone
#[no_mangle]
pub fn empty_vec_clone() -> Vec<u8> {
// CHECK: store ptr inttoptr
// CHECK: store {{i16|i32|i64}} 1, ptr %_0,
// CHECK-NEXT: getelementptr
// CHECK-NEXT: call void @llvm.memset
// CHECK-NEXT: ret void
vec![].clone()
}

// CHECK-LABEL: @empty_vec_from_array
#[no_mangle]
pub fn empty_vec_from_array() -> Vec<u8> {
// CHECK: store ptr inttoptr ({{i16|i32|i64}} 1 to ptr), ptr %_0,
// CHECK-NEXT: getelementptr
// CHECK-NEXT: call void @llvm.memset
// CHECK-NEXT: ret void
[].into()
}
14 changes: 12 additions & 2 deletions tests/codegen/loads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,22 @@ pub fn small_array_alignment(x: [i8; 4]) -> [i8; 4] {
x
}

// CHECK-LABEL: small_struct_alignment
// CHECK-LABEL: i32 @small_struct_alignment(i32 %0)
// The struct is loaded as i32, but its alignment is lower, go with 1 byte to avoid target
// dependent alignment
#[no_mangle]
pub fn small_struct_alignment(x: Bytes) -> Bytes {
// CHECK: [[VAR:%[0-9]+]] = load i32, ptr %{{.*}}, align 1
// CHECK: [[RETP:%.+]] = alloca %Bytes, align 1
// CHECK: [[ALIGNED:%.+]] = alloca i32, align 4
// CHECK: %x = alloca %Bytes, align 1

// CHECK: store i32 %0, ptr [[ALIGNED]], align 4
// CHECK: call void @llvm.memcpy{{.+}}(ptr align 1 %x, ptr align 4 %1, i64 4, i1 false)

// CHECK: [[TEMP:%[0-9]+]] = load i32, ptr %x, align 1
// CHECK: store i32 [[TEMP]], ptr [[RETP]], align 1

// CHECK: [[VAR:%[0-9]+]] = load i32, ptr [[RETP]], align 1
// CHECK: ret i32 [[VAR]]
x
}
Expand Down
15 changes: 13 additions & 2 deletions tests/codegen/mem-replace-simple-type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,19 @@ pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str {
pub fn replace_short_array(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
// CHECK-NOT: alloca
// CHECK: %[[R:.+]] = load <3 x i32>, ptr %r, align 4
// CHECK: store <3 x i32> %[[R]], ptr %result
// CHECK: store <3 x i32> %[[R]], ptr %result, align 4
// CHECK: %[[V:.+]] = load <3 x i32>, ptr %v, align 4
// CHECK: store <3 x i32> %[[V]], ptr %r
// CHECK: store <3 x i32> %[[V]], ptr %r, align 4
std::mem::replace(r, v)
}

#[no_mangle]
// CHECK-LABEL: @replace_string(
pub fn replace_string(r: &mut String, v: String) -> String {
// CHECK-NOT: alloca
// CHECK: %[[R:.+]] = load i192, ptr %r, align 8
// CHECK: store i192 %[[R]], ptr %result, align 8
// CHECK: %[[V:.+]] = load i192, ptr %v, align 8
// CHECK: store i192 %[[V]], ptr %r, align 8
std::mem::replace(r, v)
}
24 changes: 20 additions & 4 deletions tests/codegen/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,22 @@ pub struct Packed2Pair(u8, u32);
// CHECK-LABEL: @pkd1_pair
#[no_mangle]
pub fn pkd1_pair(pair1: &mut Packed1Pair, pair2: &mut Packed1Pair) {
// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 1 %{{.*}}, ptr align 1 %{{.*}}, i{{[0-9]+}} 5, i1 false)
// CHECK: [[ALLOCA:%.+]] = alloca %Packed1Pair, align 1
// CHECK: [[TEMP1:%.+]] = load i40, ptr %pair1, align 1
// CHECK: store i40 [[TEMP1]], ptr [[ALLOCA]], align 1
// CHECK: [[TEMP2:%.+]] = load i40, ptr [[ALLOCA]], align 1
// CHECK: store i40 [[TEMP2]], ptr %pair2, align 1
*pair2 = *pair1;
}

// CHECK-LABEL: @pkd2_pair
#[no_mangle]
pub fn pkd2_pair(pair1: &mut Packed2Pair, pair2: &mut Packed2Pair) {
// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 2 %{{.*}}, ptr align 2 %{{.*}}, i{{[0-9]+}} 6, i1 false)
// CHECK: [[ALLOCA:%.+]] = alloca %Packed2Pair, align 2
// CHECK: [[TEMP1:%.+]] = load i48, ptr %pair1, align 2
// CHECK: store i48 [[TEMP1]], ptr [[ALLOCA]], align 2
// CHECK: [[TEMP2:%.+]] = load i48, ptr [[ALLOCA]], align 2
// CHECK: store i48 [[TEMP2]], ptr %pair2, align 2
*pair2 = *pair1;
}

Expand All @@ -141,13 +149,21 @@ pub struct Packed2NestedPair((u32, u32));
// CHECK-LABEL: @pkd1_nested_pair
#[no_mangle]
pub fn pkd1_nested_pair(pair1: &mut Packed1NestedPair, pair2: &mut Packed1NestedPair) {
// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 1 %{{.*}}, ptr align 1 %{{.*}}, i{{[0-9]+}} 8, i1 false)
// CHECK: [[ALLOCA:%.+]] = alloca %Packed1NestedPair, align 1
// CHECK: [[TEMP1:%.+]] = load i64, ptr %pair1, align 1
// CHECK: store i64 [[TEMP1]], ptr [[ALLOCA]], align 1
// CHECK: [[TEMP2:%.+]] = load i64, ptr [[ALLOCA]], align 1
// CHECK: store i64 [[TEMP2]], ptr %pair2, align 1
*pair2 = *pair1;
}

// CHECK-LABEL: @pkd2_nested_pair
#[no_mangle]
pub fn pkd2_nested_pair(pair1: &mut Packed2NestedPair, pair2: &mut Packed2NestedPair) {
// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 2 %{{.*}}, ptr align 2 %{{.*}}, i{{[0-9]+}} 8, i1 false)
// CHECK: [[ALLOCA:%.+]] = alloca %Packed2NestedPair, align 2
// CHECK: [[TEMP1:%.+]] = load i64, ptr %pair1, align 2
// CHECK: store i64 [[TEMP1]], ptr [[ALLOCA]], align 2
// CHECK: [[TEMP2:%.+]] = load i64, ptr [[ALLOCA]], align 2
// CHECK: store i64 [[TEMP2]], ptr %pair2, align 2
*pair2 = *pair1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ pub fn vector_align() -> usize {
// CHECK-LABEL: @build_array_s
#[no_mangle]
pub fn build_array_s(x: [f32; 4]) -> S<4> {
// CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false)
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
S::<4>(x)
}

Expand All @@ -50,7 +51,8 @@ pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
// CHECK-LABEL: @build_array_t
#[no_mangle]
pub fn build_array_t(x: [f32; 4]) -> T {
// CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false)
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
T(x)
}

Expand Down
70 changes: 61 additions & 9 deletions tests/codegen/swap-small-types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@

use std::mem::swap;

// CHECK-LABEL: @swap_fat_ptrs
#[no_mangle]
pub fn swap_fat_ptrs<'a>(x: &mut &'a str, y: &mut &'a str) {
// CHECK-NOT: alloca
// CHECK: %[[X0:.+]] = load ptr, ptr %x, align 8
// CHECK: %[[X1:.+]] = load i64, ptr %[[PX1:.+]], align 8
// CHECK: %[[Y0:.+]] = load ptr, ptr %y, align 8
// CHECK: %[[Y1:.+]] = load i64, ptr %[[PY1:.+]], align 8
// CHECK: store ptr %[[Y0]], ptr %x, align 8
// CHECK: store i64 %[[Y1]], ptr %[[PX1]], align 8
// CHECK: store ptr %[[X0]], ptr %y, align 8
// CHECK: store i64 %[[X1]], ptr %[[PY1]], align 8
swap(x, y)
}

type RGB48 = [u16; 3];

// CHECK-LABEL: @swap_rgb48_manually(
Expand Down Expand Up @@ -40,9 +55,9 @@ type RGB24 = [u8; 3];
// CHECK-LABEL: @swap_rgb24_slices
#[no_mangle]
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -51,12 +66,23 @@ pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
// This one has a power-of-two size, so we iterate over it directly
type RGBA32 = [u8; 4];

// CHECK-LABEL: @swap_rgba32
#[no_mangle]
pub fn swap_rgba32(x: &mut RGBA32, y: &mut RGBA32) {
// CHECK-NOT: alloca
// CHECK: load <4 x i8>
// CHECK: load <4 x i8>
// CHECK: store <4 x i8>
// CHECK: store <4 x i8>
swap(x, y)
}

// CHECK-LABEL: @swap_rgba32_slices
#[no_mangle]
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -69,10 +95,36 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
// CHECK-LABEL: @swap_string_slices
#[no_mangle]
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
if x.len() == y.len() {
x.swap_with_slice(y);
}
}

// It's wasteful to do three `memcpy`s when a `String` is just three fields.

// CHECK-LABEL: @swap_strings
#[no_mangle]
pub fn swap_strings(x: &mut String, y: &mut String) {
// CHECK-NOT: alloca
// CHECK: load i192
// CHECK: load i192
// CHECK: store i192
// CHECK: store i192
swap(x, y)
}

// CHECK-LABEL: @swap_tuple_with_padding
#[no_mangle]
pub fn swap_tuple_with_padding(x: &mut (u8, u32, u8), y: &mut (u8, u32, u8)) {
// CHECK-NOT: alloca
// CHECK: load i64
// CHECK-NOT: noundef
// CHECK: load i64
// CHECK-NOT: noundef
// CHECK: store i64
// CHECK: store i64
swap(x, y)
}
Loading