Skip to content

Commit b36fbc7

Browse files
committed
Return values larger than 2 registers using a return area pointer
LLVM and Cranelift disagree about how to return values that don't fit in the registers designated for return values. LLVM will force the entire return value to be passed by return area pointer, while Cranelift will look at each IR level return value independently and decide to pass it in a register or not, which would result in the return value being passed partially in registers and partially through a return area pointer. While Cranelift may need to be fixed as the LLVM behavior is generally more correct with respect to the surface language, forcing this behavior in rustc itself makes it easier for other backends to conform to the Rust ABI and for the C ABI rustc already handles this behavior anyway. In addition LLVM's decision to pass the return value in registers or using a return area pointer depends on how exactly the return type is lowered to an LLVM IR type. For example `Option<u128>` can be lowered as `{ i128, i128 }` in which case the x86_64 backend would use a return area pointer, or it could be passed as `{ i32, i128 }` in which case the x86_64 backend would pass it in registers by taking advantage of an LLVM ABI extension that allows using 3 registers for the x86_64 sysv call conv rather than the officially specified 2 registers. This adjustment is only necessary for the Rust ABI as for other ABI's the calling convention implementations in rustc_target already ensure any return value which doesn't fit in the available amount of return registers is passed in the right way for the current target.
1 parent f7c8928 commit b36fbc7

File tree

3 files changed

+60
-27
lines changed

3 files changed

+60
-27
lines changed

compiler/rustc_ty_utils/src/abi.rs

+43
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,49 @@ fn fn_abi_adjust_for_abi<'tcx>(
726726
};
727727
}
728728

729+
if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 {
730+
// Return values larger than 2 registers using a return area
731+
// pointer. LLVM and Cranelift disagree about how to return
732+
// values that don't fit in the registers designated for return
733+
// values. LLVM will force the entire return value to be passed
734+
// by return area pointer, while Cranelift will look at each IR level
735+
// return value independently and decide to pass it in a
736+
// register or not, which would result in the return value
737+
// being passed partially in registers and partially through a
738+
// return area pointer.
739+
//
740+
// While Cranelift may need to be fixed as the LLVM behavior is
741+
// generally more correct with respect to the surface language,
742+
// forcing this behavior in rustc itself makes it easier for
743+
// other backends to conform to the Rust ABI and for the C ABI
744+
// rustc already handles this behavior anyway.
745+
//
746+
// In addition LLVM's decision to pass the return value in
747+
// registers or using a return area pointer depends on how
748+
// exactly the return type is lowered to an LLVM IR type. For
749+
// example `Option<u128>` can be lowered as `{ i128, i128 }`
750+
// in which case the x86_64 backend would use a return area
751+
// pointer, or it could be passed as `{ i32, i128 }` in which
752+
// case the x86_64 backend would pass it in registers by taking
753+
// advantage of an LLVM ABI extension that allows using 3
754+
// registers for the x86_64 sysv call conv rather than the
755+
// officially specified 2 registers.
756+
//
757+
// FIXME: Technically we should look at the amount of available
758+
// return registers rather than guessing that there are 2
759+
// registers for return values. In practice only a couple of
760+
// architectures have less than 2 return registers. None of
761+
// which supported by Cranelift.
762+
//
763+
// NOTE: This adjustment is only necessary for the Rust ABI as
764+
// for other ABI's the calling convention implementations in
765+
// rustc_target already ensure any return value which doesn't
766+
// fit in the available amount of return registers is passed in
767+
// the right way for the current target.
768+
arg.make_indirect();
769+
return;
770+
}
771+
729772
match arg.layout.abi {
730773
Abi::Aggregate { .. } => {}
731774

tests/codegen/i128-x86-align.rs

+13-23
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ pub struct ScalarPair {
1818

1919
#[no_mangle]
2020
pub fn load(x: &ScalarPair) -> ScalarPair {
21-
// CHECK-LABEL: @load(
22-
// CHECK-SAME: align 16 dereferenceable(32) %x
21+
// define void @load(ptr dead_on_unwind noalias nocapture noundef writable sret([32 x i8]) align 16 dereferenceable(32) %_0, ptr noalias noundef readonly align 16 dereferenceable(32) %x)
2322
// CHECK: [[A:%.*]] = load i32, ptr %x, align 16
24-
// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %x, i64 16
23+
// CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr %x, i64 16
2524
// CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16
26-
// CHECK-NEXT: [[IV1:%.*]] = insertvalue { i32, i128 } poison, i32 [[A]], 0
27-
// CHECK-NEXT: [[IV2:%.*]] = insertvalue { i32, i128 } [[IV1]], i128 [[B]], 1
28-
// CHECK-NEXT: ret { i32, i128 } [[IV2]]
25+
// CHECK-NEXT: store i32 [[A]], ptr %_0, align 16
26+
// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16
27+
// CHECK-NEXT: store i128 [[B]], ptr [[GEP]], align 16
28+
// CHECK-NEXT: ret void
2929
*x
3030
}
3131

@@ -52,30 +52,20 @@ pub fn alloca() {
5252

5353
#[no_mangle]
5454
pub fn load_volatile(x: &ScalarPair) -> ScalarPair {
55-
// CHECK-LABEL: @load_volatile(
56-
// CHECK-SAME: align 16 dereferenceable(32) %x
57-
// CHECK: [[TMP:%.*]] = alloca [32 x i8], align 16
55+
// CHECK-LABEL: define void @load_volatile(ptr dead_on_unwind noalias nocapture noundef writable sret([32 x i8]) align 16 dereferenceable(32) %_0, ptr noalias noundef readonly align 16 dereferenceable(32) %x)
5856
// CHECK: [[LOAD:%.*]] = load volatile %ScalarPair, ptr %x, align 16
59-
// CHECK-NEXT: store %ScalarPair [[LOAD]], ptr [[TMP]], align 16
60-
// CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP]], align 16
61-
// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
62-
// CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16
57+
// CHECK-NEXT: store %ScalarPair [[LOAD]], ptr %_0, align 16
58+
// CHECK-NEXT: ret void
6359
unsafe { std::intrinsics::volatile_load(x) }
6460
}
6561

6662
#[no_mangle]
6763
pub fn transmute(x: ScalarPair) -> (std::mem::MaybeUninit<i128>, i128) {
68-
// CHECK-LABEL: define { i128, i128 } @transmute(i32 noundef %x.0, i128 noundef %x.1)
69-
// CHECK: [[TMP:%.*]] = alloca [32 x i8], align 16
70-
// CHECK-NEXT: store i32 %x.0, ptr [[TMP]], align 16
71-
// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
64+
// CHECK-LABEL: define void @transmute(ptr dead_on_unwind noalias nocapture noundef writable sret([32 x i8]) align 16 dereferenceable(32) %_0, i32 noundef %x.0, i128 noundef %x.1)
65+
// CHECK: store i32 %x.0, ptr %_0, align 16
66+
// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16
7267
// CHECK-NEXT: store i128 %x.1, ptr [[GEP]], align 16
73-
// CHECK-NEXT: [[LOAD1:%.*]] = load i128, ptr %_0, align 16
74-
// CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
75-
// CHECK-NEXT: [[LOAD2:%.*]] = load i128, ptr [[GEP2]], align 16
76-
// CHECK-NEXT: [[IV1:%.*]] = insertvalue { i128, i128 } poison, i128 [[LOAD1]], 0
77-
// CHECK-NEXT: [[IV2:%.*]] = insertvalue { i128, i128 } [[IV1]], i128 [[LOAD2]], 1
78-
// CHECK-NEXT: ret { i128, i128 } [[IV2]]
68+
// CHECK-NEXT: ret void
7969
unsafe { std::mem::transmute(x) }
8070
}
8171

tests/codegen/tuple-layout-opt.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,28 @@ pub fn test_ScalarZstFirst(_: ScalarZstFirst) -> ScalarZstFirst {
1919
}
2020

2121
type ScalarPairZstLast = (u8, u128, ());
22-
// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairZstLast(i128 %_1.0, i8 %_1.1)
22+
// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstLast(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1)
2323
#[no_mangle]
2424
pub fn test_ScalarPairZstLast(_: ScalarPairZstLast) -> ScalarPairZstLast {
2525
loop {}
2626
}
2727

2828
type ScalarPairZstFirst = ((), u8, u128);
29-
// CHECK: define {{(dso_local )?}}{ i8, i128 } @test_ScalarPairZstFirst(i8 %_1.0, i128 %_1.1)
29+
// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstFirst(ptr sret([32 x i8]) align 16 %_0, i8 %_1.0, i128 %_1.1)
3030
#[no_mangle]
3131
pub fn test_ScalarPairZstFirst(_: ScalarPairZstFirst) -> ScalarPairZstFirst {
3232
loop {}
3333
}
3434

3535
type ScalarPairLotsOfZsts = ((), u8, (), u128, ());
36-
// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLotsOfZsts(i128 %_1.0, i8 %_1.1)
36+
// CHECK: define {{(dso_local )?}}void @test_ScalarPairLotsOfZsts(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1)
3737
#[no_mangle]
3838
pub fn test_ScalarPairLotsOfZsts(_: ScalarPairLotsOfZsts) -> ScalarPairLotsOfZsts {
3939
loop {}
4040
}
4141

4242
type ScalarPairLottaNesting = (((), ((), u8, (), u128, ())), ());
43-
// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLottaNesting(i128 %_1.0, i8 %_1.1)
43+
// CHECK: define {{(dso_local )?}}void @test_ScalarPairLottaNesting(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1)
4444
#[no_mangle]
4545
pub fn test_ScalarPairLottaNesting(_: ScalarPairLottaNesting) -> ScalarPairLottaNesting {
4646
loop {}

0 commit comments

Comments
 (0)