Skip to content

[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id #96190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2349,10 +2349,9 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;

// Use read/write of inaccessible memory to model the fact that this reads a
// volatile value.
// This is IntrHasSideEffects because it reads from a volatile hardware register.
def int_amdgcn_pops_exiting_wave_id :
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;

//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
Expand Down
12 changes: 0 additions & 12 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2549,15 +2549,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}

void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
SDValue Chain = N->getOperand(0);
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
}

static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
Expand Down Expand Up @@ -2714,9 +2705,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
case Intrinsic::amdgcn_pops_exiting_wave_id:
SelectPOPSExitingWaveID(N);
return;
}

SelectCode(N);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
Expand Down
17 changes: 0 additions & 17 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
MachineInstr &MI) const {
Register Dst = MI.getOperand(0).getReg();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();

// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
.addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand Down Expand Up @@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
case Intrinsic::amdgcn_pops_exiting_wave_id:
return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
bool selectPOPSExitingWaveID(MachineInstr &MI) const;

bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1

// Variant of S_MOV_B32 used for reading from volatile registers like
// SRC_POPS_EXITING_WAVE_ID.
let hasSideEffects = 1 in
def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;

let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
Expand Down Expand Up @@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}

let SubtargetPredicate = isGFX9GFX10 in
def : GCNPat<
(int_amdgcn_pops_exiting_wave_id),
(S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
>;

//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
Expand Down
22 changes: 18 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,29 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
define amdgpu_ps void @test_loop() {
; SDAG-LABEL: test_loop:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB1_1: ; %loop
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
; SDAG-NEXT: ; %bb.2: ; %exit
; SDAG-NEXT: s_endpgm
;
; GFX9-GISEL-LABEL: test_loop:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX10-GISEL-LABEL: test_loop:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
Expand All @@ -77,21 +77,35 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
; SDAG-NEXT: ; %bb.1: ; %body
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB2_2: ; %exit
; SDAG-NEXT: ; return to shader part epilog
;
; GFX9-GISEL-LABEL: test_if:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
; GFX9-GISEL-NEXT: ; %bb.1: ; %body
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: test_if:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
; GFX10-GISEL-NEXT: ; %bb.1: ; %body
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
; GFX10-GISEL-NEXT: ; return to shader part epilog
entry:
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()
Expand Down
Loading