Skip to content

Commit b5b0a22

Browse files
authored
[X86][GlobalISel] Support StructRet arguments (#96629)
We follow SelectionDAG and FastISel manner: set a register during formal arguments lowering and use this register to insert a copy of StructRet argument to RAX register during return lowering. Also add RAX register to RET instruction to fix a difference between GlobalISel and SelectionDAG, when the copy instruction could be deleted.
1 parent 23db37c commit b5b0a22

File tree

4 files changed

+72
-29
lines changed

4 files changed

+72
-29
lines changed

llvm/lib/Target/X86/GISel/X86CallLowering.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "X86CallingConv.h"
1717
#include "X86ISelLowering.h"
1818
#include "X86InstrInfo.h"
19+
#include "X86MachineFunctionInfo.h"
1920
#include "X86RegisterInfo.h"
2021
#include "X86Subtarget.h"
2122
#include "llvm/ADT/ArrayRef.h"
@@ -147,12 +148,17 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
147148
"Return value without a vreg");
148149
MachineFunction &MF = MIRBuilder.getMF();
149150
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
150-
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
151-
bool Is64Bit = STI.is64Bit();
151+
auto FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
152+
const auto &STI = MF.getSubtarget<X86Subtarget>();
153+
Register RetReg = STI.is64Bit() ? X86::RAX : X86::EAX;
152154

153155
if (!FLI.CanLowerReturn) {
154156
insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
155-
MIRBuilder.buildCopy(Is64Bit ? X86::RAX : X86::EAX, FLI.DemoteRegister);
157+
MIRBuilder.buildCopy(RetReg, FLI.DemoteRegister);
158+
MIB.addReg(RetReg);
159+
} else if (Register Reg = FuncInfo->getSRetReturnReg()) {
160+
MIRBuilder.buildCopy(RetReg, Reg);
161+
MIB.addReg(RetReg);
156162
} else if (!VRegs.empty()) {
157163
const Function &F = MF.getFunction();
158164
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -258,6 +264,7 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
258264
MachineFunction &MF = MIRBuilder.getMF();
259265
MachineRegisterInfo &MRI = MF.getRegInfo();
260266
auto DL = MF.getDataLayout();
267+
auto FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
261268

262269
SmallVector<ArgInfo, 8> SplitArgs;
263270

@@ -273,12 +280,17 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
273280
// TODO: handle not simple cases.
274281
if (Arg.hasAttribute(Attribute::ByVal) ||
275282
Arg.hasAttribute(Attribute::InReg) ||
276-
Arg.hasAttribute(Attribute::StructRet) ||
277283
Arg.hasAttribute(Attribute::SwiftSelf) ||
278284
Arg.hasAttribute(Attribute::SwiftError) ||
279285
Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
280286
return false;
281287

288+
if (Arg.hasAttribute(Attribute::StructRet)) {
289+
assert(VRegs[Idx].size() == 1 &&
290+
"Unexpected amount of registers for sret argument.");
291+
FuncInfo->setSRetReturnReg(VRegs[Idx][0]);
292+
}
293+
282294
ArgInfo OrigArg(VRegs[Idx], Arg.getType(), Idx);
283295
setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
284296
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());

llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
@a1_8bit = external global i8
66
@a7_8bit = external global i8
77
@a8_8bit = external global i8
8+
%struct.all = type { i8, i16, i32, i8, i16, i32, i64, float, double }
89

910
define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) {
1011
; X86-LABEL: name: test_i8_args_8
@@ -745,7 +746,7 @@ define <32 x float> @test_return_v32f32() {
745746
; X86-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
746747
; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>))
747748
; X86-NEXT: $eax = COPY [[LOAD]](p0)
748-
; X86-NEXT: RET 0
749+
; X86-NEXT: RET 0, $eax
749750
;
750751
; X64-LABEL: name: test_return_v32f32
751752
; X64: bb.1 (%ir-block.0):
@@ -756,7 +757,7 @@ define <32 x float> @test_return_v32f32() {
756757
; X64-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
757758
; X64-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>))
758759
; X64-NEXT: $rax = COPY [[COPY]](p0)
759-
; X64-NEXT: RET 0
760+
; X64-NEXT: RET 0, $rax
760761
ret <32 x float> zeroinitializer
761762
}
762763

@@ -793,3 +794,30 @@ define float @test_call_v32f32() {
793794
%elt = extractelement <32 x float> %vect, i32 7
794795
ret float %elt
795796
}
797+
798+
define void @test_sret(ptr sret(%struct.all) align 8 %result) #0 {
799+
; X86-LABEL: name: test_sret
800+
; X86: bb.1.entry:
801+
; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
802+
; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
803+
; X86-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 104
804+
; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[LOAD]](p0)
805+
; X86-NEXT: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.c, align 8)
806+
; X86-NEXT: $eax = COPY [[LOAD]](p0)
807+
; X86-NEXT: RET 0, $eax
808+
;
809+
; X64-LABEL: name: test_sret
810+
; X64: bb.1.entry:
811+
; X64-NEXT: liveins: $rdi
812+
; X64-NEXT: {{ $}}
813+
; X64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
814+
; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 104
815+
; X64-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
816+
; X64-NEXT: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8) into %ir.c, align 8)
817+
; X64-NEXT: $rax = COPY [[COPY]](p0)
818+
; X64-NEXT: RET 0, $rax
819+
entry:
820+
%c = getelementptr inbounds %struct.all, ptr %result, i32 0, i32 0
821+
store i8 104, ptr %c, align 8
822+
ret void
823+
}

llvm/test/CodeGen/X86/isel-buildvector-sse.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,23 @@ define <8 x i32> @test_vector_v8i32() {
2222
;
2323
; SSE-X64-GISEL-LABEL: test_vector_v8i32:
2424
; SSE-X64-GISEL: # %bb.0:
25-
; SSE-X64-GISEL-NEXT: movl $128100944, %eax # imm = 0x7A2AA50
26-
; SSE-X64-GISEL-NEXT: movl $-632258670, %ecx # imm = 0xDA507F92
27-
; SSE-X64-GISEL-NEXT: movl $-408980432, %edx # imm = 0xE79F7430
28-
; SSE-X64-GISEL-NEXT: movl $708630551, %esi # imm = 0x2A3CD817
25+
; SSE-X64-GISEL-NEXT: movq %rdi, %rax
26+
; SSE-X64-GISEL-NEXT: movl $128100944, %ecx # imm = 0x7A2AA50
27+
; SSE-X64-GISEL-NEXT: movl $-632258670, %edx # imm = 0xDA507F92
28+
; SSE-X64-GISEL-NEXT: movl $-408980432, %esi # imm = 0xE79F7430
29+
; SSE-X64-GISEL-NEXT: movl $708630551, %edi # imm = 0x2A3CD817
2930
; SSE-X64-GISEL-NEXT: movl $-871899055, %r8d # imm = 0xCC07E051
3031
; SSE-X64-GISEL-NEXT: movl $-633489957, %r9d # imm = 0xDA3DB5DB
3132
; SSE-X64-GISEL-NEXT: movl $591019567, %r10d # imm = 0x233A3E2F
3233
; SSE-X64-GISEL-NEXT: movl $708632899, %r11d # imm = 0x2A3CE143
33-
; SSE-X64-GISEL-NEXT: movl %eax, (%rdi)
34-
; SSE-X64-GISEL-NEXT: movl %ecx, 4(%rdi)
35-
; SSE-X64-GISEL-NEXT: movl %edx, 8(%rdi)
36-
; SSE-X64-GISEL-NEXT: movl %esi, 12(%rdi)
37-
; SSE-X64-GISEL-NEXT: movl %r8d, 16(%rdi)
38-
; SSE-X64-GISEL-NEXT: movl %r9d, 20(%rdi)
39-
; SSE-X64-GISEL-NEXT: movl %r10d, 24(%rdi)
40-
; SSE-X64-GISEL-NEXT: movl %r11d, 28(%rdi)
34+
; SSE-X64-GISEL-NEXT: movl %ecx, (%rax)
35+
; SSE-X64-GISEL-NEXT: movl %edx, 4(%rax)
36+
; SSE-X64-GISEL-NEXT: movl %esi, 8(%rax)
37+
; SSE-X64-GISEL-NEXT: movl %edi, 12(%rax)
38+
; SSE-X64-GISEL-NEXT: movl %r8d, 16(%rax)
39+
; SSE-X64-GISEL-NEXT: movl %r9d, 20(%rax)
40+
; SSE-X64-GISEL-NEXT: movl %r10d, 24(%rax)
41+
; SSE-X64-GISEL-NEXT: movl %r11d, 28(%rax)
4142
; SSE-X64-GISEL-NEXT: retq
4243
;
4344
; SSE-X86-LABEL: test_vector_v8i32:
@@ -88,6 +89,7 @@ define <4 x i32> @test_vector_v4i32() {
8889
;
8990
; SSE-X64-GISEL-LABEL: test_vector_v4i32:
9091
; SSE-X64-GISEL: # %bb.0:
92+
; SSE-X64-GISEL-NEXT: movq %rdi, %rax
9193
; SSE-X64-GISEL-NEXT: movaps {{.*#+}} xmm0 = [128100944,3662708626,3885986864,708630551]
9294
; SSE-X64-GISEL-NEXT: movaps %xmm0, (%rdi)
9395
; SSE-X64-GISEL-NEXT: retq

llvm/test/CodeGen/X86/isel-buildvector-sse2.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,21 @@ define <7 x i8> @test_vector_v7i8() {
1919
;
2020
; SSE2-GISEL-LABEL: test_vector_v7i8:
2121
; SSE2-GISEL: # %bb.0:
22-
; SSE2-GISEL-NEXT: movb $4, %al
23-
; SSE2-GISEL-NEXT: movb $8, %cl
24-
; SSE2-GISEL-NEXT: movb $15, %dl
25-
; SSE2-GISEL-NEXT: movb $16, %sil
22+
; SSE2-GISEL-NEXT: movq %rdi, %rax
23+
; SSE2-GISEL-NEXT: movb $4, %cl
24+
; SSE2-GISEL-NEXT: movb $8, %dl
25+
; SSE2-GISEL-NEXT: movb $15, %sil
26+
; SSE2-GISEL-NEXT: movb $16, %dil
2627
; SSE2-GISEL-NEXT: movb $23, %r8b
2728
; SSE2-GISEL-NEXT: movb $42, %r9b
2829
; SSE2-GISEL-NEXT: movb $63, %r10b
29-
; SSE2-GISEL-NEXT: movb %al, (%rdi)
30-
; SSE2-GISEL-NEXT: movb %cl, 1(%rdi)
31-
; SSE2-GISEL-NEXT: movb %dl, 2(%rdi)
32-
; SSE2-GISEL-NEXT: movb %sil, 3(%rdi)
33-
; SSE2-GISEL-NEXT: movb %r8b, 4(%rdi)
34-
; SSE2-GISEL-NEXT: movb %r9b, 5(%rdi)
35-
; SSE2-GISEL-NEXT: movb %r10b, 6(%rdi)
30+
; SSE2-GISEL-NEXT: movb %cl, (%rax)
31+
; SSE2-GISEL-NEXT: movb %dl, 1(%rax)
32+
; SSE2-GISEL-NEXT: movb %sil, 2(%rax)
33+
; SSE2-GISEL-NEXT: movb %dil, 3(%rax)
34+
; SSE2-GISEL-NEXT: movb %r8b, 4(%rax)
35+
; SSE2-GISEL-NEXT: movb %r9b, 5(%rax)
36+
; SSE2-GISEL-NEXT: movb %r10b, 6(%rax)
3637
; SSE2-GISEL-NEXT: retq
3738
ret <7 x i8> <i8 4, i8 8, i8 15, i8 16, i8 23, i8 42, i8 63>
3839
}

0 commit comments

Comments
 (0)