Skip to content

Commit 22c572e

Browse files
committed
[X86][CodeGen] Support memory folding for NDD -> RMW
1 parent 8eb0945 commit 22c572e

File tree

3 files changed

+271
-6
lines changed

3 files changed

+271
-6
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3222,13 +3222,22 @@ int X86::getCCMPCondFlagsFromCondCode(X86::CondCode CC) {
32223222
}
32233223

32243224
#define GET_X86_NF_TRANSFORM_TABLE
3225+
#define GET_X86_ND2NONND_TABLE
32253226
#include "X86GenInstrMapping.inc"
32263227
unsigned X86::getNFVariant(unsigned Opc) {
32273228
ArrayRef<X86TableEntry> Table = ArrayRef(X86NFTransformTable);
32283229
const auto I = llvm::lower_bound(Table, Opc);
32293230
return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc;
32303231
}
32313232

3233+
static unsigned getNonNDVariant(unsigned Opc, const X86Subtarget &STI) {
3234+
if (!STI.hasNDD())
3235+
return 0U;
3236+
ArrayRef<X86TableEntry> Table = ArrayRef(X86ND2NonNDTable);
3237+
const auto I = llvm::lower_bound(Table, Opc);
3238+
return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc;
3239+
}
3240+
32323241
/// Return the inverse of the specified condition,
32333242
/// e.g. turning COND_E to COND_NE.
32343243
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
@@ -7380,8 +7389,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
73807389
// Folding a memory location into the two-address part of a two-address
73817390
// instruction is different than folding it other places. It requires
73827391
// replacing the *two* registers with the memory location.
7392+
//
7393+
// Utilize the mapping NonNDD -> RMW for the NDD variant.
7394+
unsigned NonNDOpc = getNonNDVariant(Opc, Subtarget);
73837395
const X86FoldTableEntry *I =
7384-
IsTwoAddr ? lookupTwoAddrFoldTable(Opc) : lookupFoldTable(Opc, OpNum);
7396+
IsTwoAddr ? lookupTwoAddrFoldTable(NonNDOpc ? NonNDOpc : Opc)
7397+
: lookupFoldTable(Opc, OpNum);
73857398

73867399
MachineInstr *NewMI = nullptr;
73877400
if (I) {
@@ -7482,12 +7495,20 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
74827495
if (!RI.hasStackRealignment(MF))
74837496
Alignment =
74847497
std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
7498+
7499+
auto Impl = [&]() {
7500+
return foldMemoryOperandImpl(MF, MI, Ops[0],
7501+
MachineOperand::CreateFI(FrameIndex), InsertPt,
7502+
Size, Alignment, /*AllowCommute=*/true);
7503+
};
74857504
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
74867505
unsigned NewOpc = 0;
74877506
unsigned RCSize = 0;
7488-
switch (MI.getOpcode()) {
7507+
unsigned Opc = MI.getOpcode();
7508+
switch (Opc) {
74897509
default:
7490-
return nullptr;
7510+
// NDD can be folded into RMW though its Op0 and Op1 are not tied.
7511+
return getNonNDVariant(Opc, Subtarget) ? Impl() : nullptr;
74917512
case X86::TEST8rr:
74927513
NewOpc = X86::CMP8ri;
74937514
RCSize = 1;
@@ -7515,9 +7536,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
75157536
} else if (Ops.size() != 1)
75167537
return nullptr;
75177538

7518-
return foldMemoryOperandImpl(MF, MI, Ops[0],
7519-
MachineOperand::CreateFI(FrameIndex), InsertPt,
7520-
Size, Alignment, /*AllowCommute=*/true);
7539+
return Impl();
75217540
}
75227541

75237542
/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# RUN: llc %s -o - -start-before=greedy -stop-after=virtregrewriter -mtriple x86_64 -mattr=+ndd | FileCheck %s
2+
#
3+
# This test is for stack spill folding -- the ADD32ri_ND near the end of the MIR
4+
# below show be morphed into an ADD32mi by the register allocator, making it
5+
# load-operate-store to %stack.2.
6+
#
7+
# CHECK: ADD32mi %stack.2
8+
--- |
9+
10+
define fastcc void @add32ri_nd_2_add32mi(i1 %arg, i1 %arg1, i1 %arg2, ptr %arg3, ptr %arg4, i1 %arg5, i8 %arg6) #0 {
11+
bb:
12+
br label %bb7
13+
14+
bb7: ; preds = %bb21, %bb
15+
br label %bb8
16+
17+
bb8: ; preds = %bb21, %bb7
18+
%lsr.iv = phi i32 [ %lsr.iv.next, %bb21 ], [ 0, %bb7 ]
19+
br label %bb11
20+
21+
bb9: ; preds = %bb14
22+
%trunc = trunc i64 0 to i32
23+
br label %bb10
24+
25+
bb10: ; preds = %bb14, %bb9
26+
br label %bb15
27+
28+
bb11: ; preds = %bb13, %bb8
29+
store double 0.000000e+00, ptr %arg3, align 8
30+
store i8 0, ptr %arg4, align 1
31+
br i1 %arg, label %bb13, label %bb12
32+
33+
bb12: ; preds = %bb11
34+
%call = tail call i32 (ptr, ptr, ...) null(ptr null, ptr null, i32 0, i32 0, i32 0, double 0.000000e+00, i32 0)
35+
br label %bb13
36+
37+
bb13: ; preds = %bb12, %bb11
38+
br i1 %arg, label %bb14, label %bb11
39+
40+
bb14: ; preds = %bb13
41+
br i1 %arg1, label %bb9, label %bb10
42+
43+
bb15: ; preds = %bb15, %bb10
44+
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb15 ], [ %lsr.iv, %bb10 ]
45+
store i8 %arg6, ptr null, align 1
46+
%or = or i32 %lsr.iv1, 1
47+
%lsr.iv.next2 = add i32 %lsr.iv1, 1
48+
br i1 %arg5, label %bb21, label %bb15
49+
50+
bb21: ; preds = %bb15
51+
%lsr.iv.next = add i32 %lsr.iv, 8
52+
br i1 %arg2, label %bb8, label %bb7
53+
}
54+
55+
attributes #0 = { "target-features"="+ndd" }
56+
57+
...
58+
---
59+
name: add32ri_nd_2_add32mi
60+
alignment: 16
61+
exposesReturnsTwice: false
62+
legalized: false
63+
regBankSelected: false
64+
selected: false
65+
failedISel: false
66+
tracksRegLiveness: true
67+
hasWinCFI: false
68+
callsEHReturn: false
69+
callsUnwindInit: false
70+
hasEHCatchret: false
71+
hasEHScopes: false
72+
hasEHFunclets: false
73+
isOutlined: false
74+
debugInstrRef: true
75+
failsVerification: false
76+
tracksDebugUserValues: false
77+
registers:
78+
- { id: 0, class: gr32, preferred-register: '' }
79+
- { id: 1, class: gr32, preferred-register: '' }
80+
- { id: 2, class: gr32, preferred-register: '' }
81+
- { id: 3, class: gr32, preferred-register: '' }
82+
- { id: 4, class: gr32, preferred-register: '' }
83+
- { id: 5, class: gr32, preferred-register: '' }
84+
- { id: 6, class: gr32, preferred-register: '' }
85+
- { id: 7, class: gr64, preferred-register: '' }
86+
- { id: 8, class: gr64, preferred-register: '' }
87+
- { id: 9, class: gr32, preferred-register: '' }
88+
- { id: 10, class: gr8, preferred-register: '' }
89+
- { id: 11, class: gr8, preferred-register: '' }
90+
- { id: 12, class: gr8, preferred-register: '' }
91+
- { id: 13, class: gr8, preferred-register: '' }
92+
- { id: 14, class: gr8, preferred-register: '' }
93+
- { id: 15, class: gr32, preferred-register: '' }
94+
- { id: 16, class: gr32, preferred-register: '' }
95+
- { id: 17, class: gr64_with_sub_8bit, preferred-register: '' }
96+
- { id: 18, class: fr64, preferred-register: '' }
97+
- { id: 19, class: gr8, preferred-register: '' }
98+
- { id: 20, class: gr32, preferred-register: '' }
99+
- { id: 21, class: gr32, preferred-register: '' }
100+
- { id: 22, class: gr32, preferred-register: '' }
101+
liveins:
102+
- { reg: '$edi', virtual-reg: '%4' }
103+
- { reg: '$esi', virtual-reg: '%5' }
104+
- { reg: '$edx', virtual-reg: '%6' }
105+
- { reg: '$rcx', virtual-reg: '%7' }
106+
- { reg: '$r8', virtual-reg: '%8' }
107+
- { reg: '$r9d', virtual-reg: '%9' }
108+
frameInfo:
109+
isFrameAddressTaken: false
110+
isReturnAddressTaken: false
111+
hasStackMap: false
112+
hasPatchPoint: false
113+
stackSize: 0
114+
offsetAdjustment: 0
115+
maxAlignment: 8
116+
adjustsStack: true
117+
hasCalls: true
118+
stackProtector: ''
119+
functionContext: ''
120+
maxCallFrameSize: 4294967295
121+
cvBytesOfCalleeSavedRegisters: 0
122+
hasOpaqueSPAdjustment: false
123+
hasVAStart: false
124+
hasMustTailInVarArgFunc: false
125+
hasTailCall: false
126+
isCalleeSavedInfoValid: false
127+
localFrameSize: 0
128+
savePoint: ''
129+
restorePoint: ''
130+
fixedStack:
131+
- { id: 0, type: default, offset: 0, size: 1, alignment: 16, stack-id: default,
132+
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
133+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
134+
stack: []
135+
entry_values: []
136+
callSites: []
137+
debugValueSubstitutions: []
138+
constants: []
139+
machineFunctionInfo: {}
140+
body: |
141+
bb.0.bb:
142+
successors: %bb.1(0x80000000)
143+
liveins: $edi, $esi, $edx, $rcx, $r8, $r9d
144+
145+
%9:gr32 = COPY $r9d
146+
%8:gr64 = COPY $r8
147+
%7:gr64 = COPY $rcx
148+
%6:gr32 = COPY $edx
149+
%5:gr32 = COPY $esi
150+
%4:gr32 = COPY $edi
151+
%14:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s8) from %fixed-stack.0, align 16)
152+
undef %17.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
153+
154+
bb.1.bb7:
155+
successors: %bb.2(0x80000000)
156+
157+
%21:gr32 = MOV32r0 implicit-def dead $eflags
158+
159+
bb.2.bb8:
160+
successors: %bb.4(0x80000000)
161+
162+
JMP_1 %bb.4
163+
164+
bb.3.bb9:
165+
successors: %bb.8(0x80000000)
166+
167+
%22:gr32 = COPY %21
168+
JMP_1 %bb.8
169+
170+
bb.4.bb11:
171+
successors: %bb.6(0x40000000), %bb.5(0x40000000)
172+
173+
MOV64mi32 %7, 1, $noreg, 0, $noreg, 0 :: (store (s64) into %ir.arg3)
174+
MOV8mi %8, 1, $noreg, 0, $noreg, 0 :: (store (s8) into %ir.arg4)
175+
TEST8ri %4.sub_8bit, 1, implicit-def $eflags
176+
JCC_1 %bb.6, 5, implicit $eflags
177+
JMP_1 %bb.5
178+
179+
bb.5.bb12:
180+
successors: %bb.6(0x80000000)
181+
182+
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
183+
$xmm0 = FsFLD0SD
184+
dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
185+
dead $esi = MOV32r0 implicit-def dead $eflags, implicit-def $rsi
186+
$edx = MOV32r0 implicit-def dead $eflags
187+
$ecx = MOV32r0 implicit-def dead $eflags
188+
$r8d = MOV32r0 implicit-def dead $eflags
189+
$r9d = MOV32r0 implicit-def dead $eflags
190+
$al = MOV8ri 1
191+
CALL64r %17, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $edx, implicit $ecx, implicit $r8d, implicit killed $xmm0, implicit $r9d, implicit killed $al, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax
192+
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
193+
194+
bb.6.bb13:
195+
successors: %bb.7(0x04000000), %bb.4(0x7c000000)
196+
197+
TEST8ri %4.sub_8bit, 1, implicit-def $eflags
198+
JCC_1 %bb.4, 4, implicit $eflags
199+
JMP_1 %bb.7
200+
201+
bb.7.bb14:
202+
successors: %bb.3(0x40000000), %bb.8(0x40000000)
203+
204+
TEST8ri %5.sub_8bit, 1, implicit-def $eflags
205+
%22:gr32 = COPY %21
206+
JCC_1 %bb.3, 5, implicit $eflags
207+
208+
bb.8.bb15:
209+
successors: %bb.9(0x04000000), %bb.8(0x7c000000)
210+
211+
MOV8mr $noreg, 1, $noreg, 0, $noreg, %14 :: (store (s8) into `ptr null`)
212+
%22:gr32 = INC32r_ND %22, implicit-def dead $eflags
213+
TEST8ri %9.sub_8bit, 1, implicit-def $eflags
214+
JCC_1 %bb.8, 4, implicit $eflags
215+
JMP_1 %bb.9
216+
217+
bb.9.bb21:
218+
successors: %bb.2(0x7c000000), %bb.1(0x04000000)
219+
220+
%21:gr32 = ADD32ri_ND %21, 8, implicit-def dead $eflags
221+
TEST8ri %6.sub_8bit, 1, implicit-def $eflags
222+
JCC_1 %bb.2, 5, implicit $eflags
223+
JMP_1 %bb.1
224+
225+
...

llvm/utils/TableGen/X86InstrMappingEmitter.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ class X86InstrMappingEmitter {
6363
raw_ostream &OS);
6464
void emitNFTransformTable(ArrayRef<const CodeGenInstruction *> Insts,
6565
raw_ostream &OS);
66+
void emitND2NonNDTable(ArrayRef<const CodeGenInstruction *> Insts,
67+
raw_ostream &OS);
6668

6769
// Prints the definition of class X86TableEntry.
6870
void printClassDef(raw_ostream &OS);
@@ -297,6 +299,24 @@ void X86InstrMappingEmitter::emitNFTransformTable(
297299
printTable(Table, "X86NFTransformTable", "GET_X86_NF_TRANSFORM_TABLE", OS);
298300
}
299301

302+
void X86InstrMappingEmitter::emitND2NonNDTable(
303+
ArrayRef<const CodeGenInstruction *> Insts, raw_ostream &OS) {
304+
std::vector<Entry> Table;
305+
for (const CodeGenInstruction *Inst : Insts) {
306+
const Record *Rec = Inst->TheDef;
307+
StringRef Name = Rec->getName();
308+
if (!isInteresting(Rec) || !Name.ends_with("_ND"))
309+
continue;
310+
auto *NewRec = Records.getDef(Name.drop_back(3));
311+
if (!NewRec)
312+
continue;
313+
auto &NewInst = Target.getInstruction(NewRec);
314+
if (isRegisterOperand(NewInst.Operands[0].Rec))
315+
Table.push_back(std::pair(Inst, &NewInst));
316+
}
317+
printTable(Table, "X86ND2NonNDTable", "GET_X86_ND2NONND_TABLE", OS);
318+
}
319+
300320
void X86InstrMappingEmitter::run(raw_ostream &OS) {
301321
emitSourceFileHeader("X86 instruction mapping", OS);
302322

@@ -305,6 +325,7 @@ void X86InstrMappingEmitter::run(raw_ostream &OS) {
305325
printClassDef(OS);
306326
emitCompressEVEXTable(Insts, OS);
307327
emitNFTransformTable(Insts, OS);
328+
emitND2NonNDTable(Insts, OS);
308329
}
309330
} // namespace
310331

0 commit comments

Comments
 (0)