-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id #96190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-ir Author: Jay Foad (jayfoad) ChangesMark both the intrinsic and the selected MachineInstr as having side Full diff: https://github.com/llvm/llvm-project/pull/96190.diff 7 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8a5566ae12020..059449a8d7d2c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2349,10 +2349,9 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
-// Use read/write of inaccessible memory to model the fact that this reads a
-// volatile value.
+// This is IntrHasSideEffects because it reads from a volatile hardware register.
def int_amdgcn_pops_exiting_wave_id :
- DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5b616d8cc77d1..7ab76b7ffaa55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2549,15 +2549,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
-void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
- SDValue Chain = N->getOperand(0);
- CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
-}
-
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
@@ -2714,9 +2705,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- SelectPOPSExitingWaveID(N);
- return;
}
SelectCode(N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index ae6fdb594a2ef..6d370a8b7782b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -263,7 +263,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
- void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 03e2d622dd319..16a2987e7b463 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
-bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
- MachineInstr &MI) const {
- Register Dst = MI.getOperand(0).getReg();
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock *MBB = MI.getParent();
-
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
- .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
- MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 48f3b18118014..f561d5d29efc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -125,7 +125,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
- bool selectPOPSExitingWaveID(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e1253d3ed0500..64f33199545a1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -215,6 +215,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1
+// Variant of S_MOV_B32 used for reading from volatile registers like
+// SRC_POPS_EXITING_WAVE_ID.
+let hasSideEffects = 1 in
+def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
+
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+ (int_amdgcn_pops_exiting_wave_id),
+ (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index aaa4e2b3622df..f3c5ac757e22b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
define amdgpu_ps void @test_loop() {
; SDAG-LABEL: test_loop:
; SDAG: ; %bb.0:
-; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB1_1: ; %loop
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
; SDAG-NEXT: ; %bb.2: ; %exit
@@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX9-GISEL-LABEL: test_loop:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
@@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX10-GISEL-LABEL: test_loop:
; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
@@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
+; SDAG-NEXT: ; %bb.1: ; %body
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: .LBB2_2: ; %exit
; SDAG-NEXT: ; return to shader part epilog
;
; GFX9-GISEL-LABEL: test_if:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX9-GISEL-NEXT: ; %bb.1: ; %body
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: test_if:
@@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %body
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
; GFX10-GISEL-NEXT: ; return to shader part epilog
entry:
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
My only concern would be that there are things that need to be made aware of S_MOV_B32_sideeffects
-- but after a quick search I could not see anything.
…lvm#96190) Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
…lvm#96190) Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
Mark both the intrinsic and the selected MachineInstr as having side
effects to prevent MachineLICM and MachineCSE from moving/removing them.