Skip to content

Commit b053359

Browse files
[X86InstrInfo] support memfold on spillable inline asm (#70832)
This enables -regalloc=greedy to memfold spillable inline asm MachineOperands. Because no instruction selection framework marks MachineOperands as spillable, no language frontend can observe functional changes from this patch. That will change once instruction selection frameworks are updated. Link: #20571
1 parent 0ccef6a commit b053359

File tree

5 files changed

+267
-14
lines changed

5 files changed

+267
-14
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2194,8 +2194,8 @@ class TargetInstrInfo : public MCInstrInfo {
21942194
/// finalize-isel. Example:
21952195
/// INLINEASM ... 262190 /* mem:m */, %stack.0.x.addr, 1, $noreg, 0, $noreg
21962196
/// we would add placeholders for: ^ ^ ^ ^
2197-
virtual void
2198-
getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
2197+
virtual void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
2198+
int FI) const {
21992199
llvm_unreachable("unknown number of operands necessary");
22002200
}
22012201

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -578,28 +578,37 @@ static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
578578
foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
579579
}
580580

581-
// Change the operand from a register to a frame index.
582-
MO.ChangeToFrameIndex(FI, MO.getTargetFlags());
583-
584-
SmallVector<MachineOperand, 4> NewOps;
585-
TII.getFrameIndexOperands(NewOps);
581+
SmallVector<MachineOperand, 5> NewOps;
582+
TII.getFrameIndexOperands(NewOps, FI);
586583
assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
587-
MI->insert(MI->operands_begin() + OpNo + 1, NewOps);
584+
MI->removeOperand(OpNo);
585+
MI->insert(MI->operands_begin() + OpNo, NewOps);
588586

589587
// Change the previous operand to a MemKind InlineAsm::Flag. The second param
590588
// is the per-target number of operands that represent the memory operand
591589
// excluding this one (MD). This includes MO.
592-
InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size() + 1);
590+
InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size());
593591
F.setMemConstraint(InlineAsm::ConstraintCode::m);
594592
MachineOperand &MD = MI->getOperand(OpNo - 1);
595593
MD.setImm(F);
596594

597-
// Update mayload/maystore metadata.
595+
// Update mayload/maystore metadata, and memoperands.
596+
MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
598597
MachineOperand &ExtraMO = MI->getOperand(InlineAsm::MIOp_ExtraInfo);
599-
if (RI.Reads)
598+
if (RI.Reads) {
600599
ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad);
601-
if (RI.Writes)
600+
Flags |= MachineMemOperand::MOLoad;
601+
}
602+
if (RI.Writes) {
602603
ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore);
604+
Flags |= MachineMemOperand::MOStore;
605+
}
606+
MachineFunction *MF = MI->getMF();
607+
const MachineFrameInfo &MFI = MF->getFrameInfo();
608+
MachineMemOperand *MMO = MF->getMachineMemOperand(
609+
MachinePointerInfo::getFixedStack(*MF, FI), Flags, MFI.getObjectSize(FI),
610+
MFI.getObjectAlign(FI));
611+
MI->addMemOperand(*MF, MMO);
603612
}
604613

605614
// Returns nullptr if not possible to fold.
@@ -671,7 +680,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
671680
if (NewMI)
672681
MBB->insert(MI, NewMI);
673682
} else if (MI.isInlineAsm()) {
674-
NewMI = foldInlineAsmMemOperand(MI, Ops, FI, *this);
683+
return foldInlineAsmMemOperand(MI, Ops, FI, *this);
675684
} else {
676685
// Ask the target to do the actual folding.
677686
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
@@ -744,7 +753,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
744753
if (NewMI)
745754
NewMI = &*MBB.insert(MI, NewMI);
746755
} else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) {
747-
NewMI = foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
756+
return foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
748757
} else {
749758
// Ask the target to do the actual folding.
750759
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10273,5 +10273,14 @@ void X86InstrInfo::genAlternativeCodeSequence(
1027310273
}
1027410274
}
1027510275

10276+
// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
10277+
void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
10278+
int FI) const {
10279+
X86AddressMode M;
10280+
M.BaseType = X86AddressMode::FrameIndexBase;
10281+
M.Base.FrameIndex = FI;
10282+
M.getFullAddress(Ops);
10283+
}
10284+
1027610285
#define GET_INSTRINFO_HELPERS
1027710286
#include "X86GenInstrInfo.inc"

llvm/lib/Target/X86/X86InstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,9 @@ class X86InstrInfo final : public X86GenInstrInfo {
659659
return false;
660660
}
661661

662+
void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
663+
int FI) const override;
664+
662665
private:
663666
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
664667
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -start-after=finalize-isel -regalloc=greedy -stop-after=greedy \
3+
# RUN: -verify-machineinstrs -verify-regalloc %s -o - | FileCheck %s
4+
--- |
5+
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
6+
target triple = "i386-unknown-linux-gnu"
7+
8+
define void @input(i32 %0) #0 {
9+
call void asm "# $0", "rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(i32 %0)
10+
ret void
11+
}
12+
13+
define i32 @output() #0 {
14+
%1 = alloca i32, align 4
15+
call void asm "# $0", "=*rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %1)
16+
%2 = load i32, ptr %1, align 4
17+
ret i32 %2
18+
}
19+
20+
define i32 @inout(i32 %0) #0 {
21+
%2 = alloca i32, align 4
22+
store i32 %0, ptr %2, align 4
23+
call void asm "# $0 $1", "=*rm,0,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %2, i32 %0)
24+
%3 = load i32, ptr %2, align 4
25+
ret i32 %3
26+
}
27+
28+
attributes #0 = { nounwind }
29+
30+
...
31+
---
32+
name: input
33+
alignment: 16
34+
exposesReturnsTwice: false
35+
legalized: false
36+
regBankSelected: false
37+
selected: false
38+
failedISel: false
39+
tracksRegLiveness: true
40+
hasWinCFI: false
41+
callsEHReturn: false
42+
callsUnwindInit: false
43+
hasEHCatchret: false
44+
hasEHScopes: false
45+
hasEHFunclets: false
46+
isOutlined: false
47+
debugInstrRef: false
48+
failsVerification: false
49+
tracksDebugUserValues: false
50+
registers:
51+
- { id: 0, class: gr32, preferred-register: '' }
52+
liveins: []
53+
frameInfo:
54+
isFrameAddressTaken: false
55+
isReturnAddressTaken: false
56+
hasStackMap: false
57+
hasPatchPoint: false
58+
stackSize: 0
59+
offsetAdjustment: 0
60+
maxAlignment: 4
61+
adjustsStack: false
62+
hasCalls: false
63+
stackProtector: ''
64+
functionContext: ''
65+
maxCallFrameSize: 4294967295
66+
cvBytesOfCalleeSavedRegisters: 0
67+
hasOpaqueSPAdjustment: false
68+
hasVAStart: false
69+
hasMustTailInVarArgFunc: false
70+
hasTailCall: false
71+
localFrameSize: 0
72+
savePoint: ''
73+
restorePoint: ''
74+
fixedStack:
75+
- { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
76+
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
77+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
78+
stack: []
79+
entry_values: []
80+
callSites: []
81+
debugValueSubstitutions: []
82+
constants: []
83+
machineFunctionInfo: {}
84+
body: |
85+
bb.0 (%ir-block.1):
86+
; CHECK-LABEL: name: input
87+
; CHECK: INLINEASM &"# $0", 8 /* mayload attdialect */, 262190 /* mem:m */, %fixed-stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load (s32) from %fixed-stack.0, align 16)
88+
; CHECK-NEXT: RET 0
89+
%0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
90+
INLINEASM &"# $0", 0 /* attdialect */, 1076101129 /* reguse:GR32 spillable */, %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
91+
RET 0
92+
93+
...
94+
---
95+
name: output
96+
alignment: 16
97+
exposesReturnsTwice: false
98+
legalized: false
99+
regBankSelected: false
100+
selected: false
101+
failedISel: false
102+
tracksRegLiveness: true
103+
hasWinCFI: false
104+
callsEHReturn: false
105+
callsUnwindInit: false
106+
hasEHCatchret: false
107+
hasEHScopes: false
108+
hasEHFunclets: false
109+
isOutlined: false
110+
debugInstrRef: false
111+
failsVerification: false
112+
tracksDebugUserValues: false
113+
registers:
114+
- { id: 0, class: gr32, preferred-register: '' }
115+
liveins: []
116+
frameInfo:
117+
isFrameAddressTaken: false
118+
isReturnAddressTaken: false
119+
hasStackMap: false
120+
hasPatchPoint: false
121+
stackSize: 0
122+
offsetAdjustment: 0
123+
maxAlignment: 4
124+
adjustsStack: false
125+
hasCalls: false
126+
stackProtector: ''
127+
functionContext: ''
128+
maxCallFrameSize: 4294967295
129+
cvBytesOfCalleeSavedRegisters: 0
130+
hasOpaqueSPAdjustment: false
131+
hasVAStart: false
132+
hasMustTailInVarArgFunc: false
133+
hasTailCall: false
134+
localFrameSize: 0
135+
savePoint: ''
136+
restorePoint: ''
137+
fixedStack: []
138+
stack:
139+
- { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
140+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
141+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
142+
entry_values: []
143+
callSites: []
144+
debugValueSubstitutions: []
145+
constants: []
146+
machineFunctionInfo: {}
147+
body: |
148+
bb.0 (%ir-block.0):
149+
; CHECK-LABEL: name: output
150+
; CHECK: INLINEASM &"# $0", 16 /* maystore attdialect */, 262190 /* mem:m */, %stack.1, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.1)
151+
; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
152+
; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %ir.1)
153+
; CHECK-NEXT: $eax = COPY [[MOV32rm]]
154+
; CHECK-NEXT: RET 0, $eax
155+
INLINEASM &"# $0", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
156+
MOV32mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.1)
157+
$eax = COPY %0
158+
RET 0, $eax
159+
160+
...
161+
---
162+
name: inout
163+
alignment: 16
164+
exposesReturnsTwice: false
165+
legalized: false
166+
regBankSelected: false
167+
selected: false
168+
failedISel: false
169+
tracksRegLiveness: true
170+
hasWinCFI: false
171+
callsEHReturn: false
172+
callsUnwindInit: false
173+
hasEHCatchret: false
174+
hasEHScopes: false
175+
hasEHFunclets: false
176+
isOutlined: false
177+
debugInstrRef: false
178+
failsVerification: false
179+
tracksDebugUserValues: false
180+
registers:
181+
- { id: 0, class: gr32, preferred-register: '' }
182+
- { id: 1, class: gr32, preferred-register: '' }
183+
liveins: []
184+
frameInfo:
185+
isFrameAddressTaken: false
186+
isReturnAddressTaken: false
187+
hasStackMap: false
188+
hasPatchPoint: false
189+
stackSize: 0
190+
offsetAdjustment: 0
191+
maxAlignment: 4
192+
adjustsStack: false
193+
hasCalls: false
194+
stackProtector: ''
195+
functionContext: ''
196+
maxCallFrameSize: 4294967295
197+
cvBytesOfCalleeSavedRegisters: 0
198+
hasOpaqueSPAdjustment: false
199+
hasVAStart: false
200+
hasMustTailInVarArgFunc: false
201+
hasTailCall: false
202+
localFrameSize: 0
203+
savePoint: ''
204+
restorePoint: ''
205+
fixedStack:
206+
- { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
207+
isImmutable: false, isAliased: false, callee-saved-register: '',
208+
callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
209+
debug-info-location: '' }
210+
stack: []
211+
entry_values: []
212+
callSites: []
213+
debugValueSubstitutions: []
214+
constants: []
215+
machineFunctionInfo: {}
216+
body: |
217+
bb.0 (%ir-block.1):
218+
; CHECK-LABEL: name: inout
219+
; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
220+
; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %stack.0)
221+
; CHECK-NEXT: INLINEASM &"# $0 $1", 24 /* mayload maystore attdialect */, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load store (s32) on %stack.0)
222+
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
223+
; CHECK-NEXT: MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into %ir.2, align 16)
224+
; CHECK-NEXT: $eax = COPY [[MOV32rm1]]
225+
; CHECK-NEXT: RET 0, $eax
226+
%1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
227+
INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 2147483657 /* reguse tiedto:$0 */, %1(tied-def 3), 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
228+
MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2, align 16)
229+
$eax = COPY %0
230+
RET 0, $eax
231+
232+
...

0 commit comments

Comments
 (0)