Skip to content

Commit 657fb44

Browse files
authored
[AMDGPU] Add target hook to isGlobalMemoryObject (#112781)
We want special handing for IGLP instructions in the scheduler but they should still be treated like they have side effects by other passes. Add a target hook to the ScheduleDAGInstrs DAG builder so that we have more control over this.
1 parent 9a9e41c commit 657fb44

10 files changed

+85
-46
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ class TargetInstrInfo : public MCInstrInfo {
136136
const TargetRegisterInfo *TRI,
137137
const MachineFunction &MF) const;
138138

139+
/// Returns true if MI is an instruction we are unable to reason about
140+
/// (like a call or something with unmodeled side effects).
141+
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const;
142+
139143
/// Return true if the instruction is trivially rematerializable, meaning it
140144
/// has no side effects and requires no operands that aren't always available.
141145
/// This means the only allowed uses are constants and unallocatable physical

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/ScheduleDAG.h"
3636
#include "llvm/CodeGen/ScheduleDFS.h"
3737
#include "llvm/CodeGen/SlotIndexes.h"
38+
#include "llvm/CodeGen/TargetInstrInfo.h"
3839
#include "llvm/CodeGen/TargetRegisterInfo.h"
3940
#include "llvm/CodeGen/TargetSubtargetInfo.h"
4041
#include "llvm/Config/llvm-config.h"
@@ -547,12 +548,6 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
547548
}
548549
}
549550

550-
/// Returns true if MI is an instruction we are unable to reason about
551-
/// (like a call or something with unmodeled side effects).
552-
static inline bool isGlobalMemoryObject(MachineInstr *MI) {
553-
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
554-
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
555-
}
556551

557552
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
558553
unsigned Latency) {
@@ -899,8 +894,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
899894
// isLoadFromStackSLot are not usable after stack slots are lowered to
900895
// actual addresses).
901896

897+
const TargetInstrInfo *TII = ST.getInstrInfo();
902898
// This is a barrier event that acts as a pivotal node in the DAG.
903-
if (isGlobalMemoryObject(&MI)) {
899+
if (TII->isGlobalMemoryObject(&MI)) {
904900

905901
// Become the barrier chain.
906902
if (BarrierChain)

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,3 +1917,8 @@ bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
19171917
}
19181918
return true;
19191919
}
1920+
1921+
bool TargetInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
1922+
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
1923+
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
1924+
}

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -239,23 +239,6 @@ class SchedGroup {
239239
}
240240
};
241241

242-
// Remove all existing edges from a SCHED_BARRIER or SCHED_GROUP_BARRIER.
243-
static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) {
244-
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER ||
245-
SU.getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
246-
SU.getInstr()->getOpcode() == AMDGPU::IGLP_OPT);
247-
248-
while (!SU.Preds.empty())
249-
for (auto &P : SU.Preds)
250-
SU.removePred(P);
251-
252-
while (!SU.Succs.empty())
253-
for (auto &S : SU.Succs)
254-
for (auto &SP : S.getSUnit()->Preds)
255-
if (SP.getSUnit() == &SU)
256-
S.getSUnit()->removePred(SP);
257-
}
258-
259242
using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int, 4>>;
260243
using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4>;
261244

@@ -459,7 +442,6 @@ void PipelineSolver::makePipeline() {
459442
// Command line requested IGroupLP doesn't have SGBarr
460443
if (!SGBarr)
461444
continue;
462-
resetEdges(*SGBarr, DAG);
463445
SG.link(*SGBarr, false);
464446
}
465447
}
@@ -2611,7 +2593,6 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
26112593
initSchedGroupBarrierPipelineStage(R);
26122594
FoundSB = true;
26132595
} else if (Opc == AMDGPU::IGLP_OPT) {
2614-
resetEdges(*R, DAG);
26152596
if (!FoundSB && !FoundIGLP) {
26162597
FoundIGLP = true;
26172598
ShouldApplyIGLP = initIGLPOpt(*R);
@@ -2633,7 +2614,6 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
26332614
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
26342615
// Remove all existing edges from the SCHED_BARRIER that were added due to the
26352616
// instruction having side effects.
2636-
resetEdges(SchedBarrier, DAG);
26372617
LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
26382618
<< MI.getOperand(0).getImm() << "\n");
26392619
auto InvertedMask =
@@ -2691,7 +2671,6 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
26912671
std::vector<SUnit>::reverse_iterator RIter) {
26922672
// Remove all existing edges from the SCHED_GROUP_BARRIER that were added due
26932673
// to the instruction having side effects.
2694-
resetEdges(*RIter, DAG);
26952674
MachineInstr &SGB = *RIter->getInstr();
26962675
assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER);
26972676
int32_t SGMask = SGB.getOperand(0).getImm();

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ static void getRegisterPressures(
188188
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189189
}
190190

191+
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
192+
// mutations, requiring all other mutations to be disabled.
193+
static bool isIGLPMutationOnly(unsigned Opcode) {
194+
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195+
}
196+
191197
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
192198
bool AtTop,
193199
const RegPressureTracker &RPTracker,
@@ -1152,8 +1158,7 @@ bool GCNSchedStage::initGCNRegion() {
11521158
StageID == GCNSchedStageID::ILPInitialSchedule) {
11531159
for (auto &I : DAG) {
11541160
Unsched.push_back(&I);
1155-
if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
1156-
I.getOpcode() == AMDGPU::IGLP_OPT)
1161+
if (isIGLPMutationOnly(I.getOpcode()))
11571162
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
11581163
}
11591164
} else {
@@ -1894,8 +1899,7 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
18941899

18951900
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
18961901
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
1897-
unsigned Opc = MI->getOpcode();
1898-
return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1902+
return isIGLPMutationOnly(MI->getOpcode());
18991903
});
19001904
}
19011905

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10051,3 +10051,10 @@ void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
1005110051
Op.setSubReg(AMDGPU::sub0);
1005210052
MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
1005310053
}
10054+
10055+
bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
10056+
if (isIGLP(*MI))
10057+
return false;
10058+
10059+
return TargetInstrInfo::isGlobalMemoryObject(MI);
10060+
}

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
242242
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
243243
int64_t &Offset1) const override;
244244

245+
bool isGlobalMemoryObject(const MachineInstr *MI) const override;
246+
245247
bool getMemOperandsWithOffsetWidth(
246248
const MachineInstr &LdSt,
247249
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
@@ -968,6 +970,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
968970
return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
969971
}
970972

973+
bool isIGLP(unsigned Opcode) const {
974+
return Opcode == AMDGPU::SCHED_BARRIER ||
975+
Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
976+
}
977+
978+
bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
979+
971980
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
972981
switch (Opcode) {
973982
case AMDGPU::S_WAITCNT_soft:

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
; GCN-NEXT: ; implicit-def: $vgpr79
2626
; GCN-NEXT: ; implicit-def: $vgpr80
2727
; GCN-NEXT: ; implicit-def: $vgpr91
28-
; GCN-NEXT: ;;#ASMSTART
29-
; GCN-NEXT: s_waitcnt vmcnt(8)
30-
; GCN-NEXT: ;;#ASMEND
3128
; GCN-NEXT: ; kill: killed $sgpr16_sgpr17_sgpr18_sgpr19
3229
; GCN-NEXT: ; iglp_opt mask(0x00000002)
3330
; GCN-NEXT: s_nop 1
@@ -477,6 +474,9 @@
477474
; GCN-NEXT: s_waitcnt lgkmcnt(0)
478475
; GCN-NEXT: buffer_inv sc0 sc1
479476
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[4:5], v[8:9], v[32:47]
477+
; GCN-NEXT: ;;#ASMSTART
478+
; GCN-NEXT: s_waitcnt vmcnt(8)
479+
; GCN-NEXT: ;;#ASMEND
480480
; GCN-NEXT: v_mov_b32_e32 v4, 0
481481
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[6:7], v[0:1], v[32:47]
482482
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,41 @@ entry:
285285
ret void
286286
}
287287

288+
define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
289+
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
290+
; GCN: ; %bb.0: ; %entry
291+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
292+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
293+
; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
294+
; GCN-NEXT: ; iglp_opt mask(0x00000000)
295+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
296+
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
297+
; GCN-NEXT: ds_read_b32 v1, v1
298+
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
299+
; GCN-NEXT: v_mov_b32_e32 v2, s0
300+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
301+
; GCN-NEXT: ds_write_b32 v0, v1
302+
; GCN-NEXT: ;;#ASMSTART
303+
; GCN-NEXT: ;;#ASMEND
304+
; GCN-NEXT: ds_read_b32 v0, v2 offset:256
305+
; GCN-NEXT: v_mov_b32_e32 v1, s1
306+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
307+
; GCN-NEXT: ds_write_b32 v1, v0 offset:256
308+
; GCN-NEXT: s_endpgm
309+
entry:
310+
%idx = call i32 @llvm.amdgcn.workitem.id.x()
311+
%load.0.addr = getelementptr float, ptr addrspace(3) %in, i32 %idx
312+
%load.0 = load float, ptr addrspace(3) %load.0.addr
313+
%store.0.addr = getelementptr float, ptr addrspace(3) %out, i32 %idx
314+
store float %load.0, ptr addrspace(3) %store.0.addr
315+
call void asm sideeffect "", ""() #1
316+
call void @llvm.amdgcn.iglp.opt(i32 0) #1
317+
%load.1.addr = getelementptr float, ptr addrspace(3) %in, i32 64
318+
%load.1 = load float, ptr addrspace(3) %load.1.addr
319+
%store.1.addr = getelementptr float, ptr addrspace(3) %out, i32 64
320+
store float %load.1, ptr addrspace(3) %store.1.addr
321+
ret void
322+
}
288323

289324
declare void @llvm.amdgcn.iglp.opt(i32) #1
290325
declare i32 @llvm.amdgcn.workitem.id.x() #1

0 commit comments

Comments
 (0)