Skip to content

[AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id #89612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,11 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;

// Use read/write of inaccessible memory to model the fact that this reads a
// volatile value.
def int_amdgcn_pops_exiting_wave_id :
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;

//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2526,6 +2526,14 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}

void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), Reg);
}

static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
Expand Down Expand Up @@ -2682,6 +2690,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
case Intrinsic::amdgcn_pops_exiting_wave_id:
SelectPOPSExitingWaveID(N);
return;
}

SelectCode(N);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2079,6 +2079,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
MachineInstr &MI) const {
Register Dst = MI.getOperand(0).getReg();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();

// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
.addDef(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand Down Expand Up @@ -2129,6 +2144,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
case Intrinsic::amdgcn_pops_exiting_wave_id:
return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
bool selectPOPSExitingWaveID(MachineInstr &MI) const;

bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_pops_exiting_wave_id:
return getDefaultMappingSOP(MI);
default:
return getInvalidInstructionMapping();
}
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL

define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
; SDAG-LABEL: test:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO in a future patch: fold src_pops_exiting_wave_id into uses of s2. This might be tricky because the value of src_pops_exiting_wave_id is volatile.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. As long as there's only a single use it may be fine.

Copy link

@Triang3l Triang3l Apr 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another possible, but disgustingly dirty approach would be to expose it not as a load, but rather directly as an integer addition to an operand 🙃 But that'd definitely add some excessive responsibilities to it, especially since there are other ways the wave ID wraparound can be handled mathematically such as comparisons, and I don't know how unfriendly to the rest of the architecture that would be.

; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v1, s2
; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; SDAG-NEXT: s_endpgm
;
; GFX9-GISEL-LABEL: test:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX10-GISEL-LABEL: test:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
store i32 %id, ptr addrspace(1) %ptr
ret void
}
Loading