Skip to content

Commit 45d2d77

Browse files
authored
[AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325)
This is only valid on targets with architected SGPRs.
1 parent 122ed55 commit 45d2d77

File tree

6 files changed

+100
-0
lines changed

6 files changed

+100
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2777,6 +2777,10 @@ class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
27772777

27782778
def int_amdgcn_global_load_tr : AMDGPULoadIntrinsic<global_ptr_ty>;
27792779

2780+
// i32 @llvm.amdgcn.wave.id()
2781+
def int_amdgcn_wave_id :
2782+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
2783+
27802784
//===----------------------------------------------------------------------===//
27812785
// Deep learning intrinsics.
27822786
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6883,6 +6883,23 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
68836883
return true;
68846884
}
68856885

6886+
bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
6887+
MachineIRBuilder &B) const {
6888+
// With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
6889+
if (!ST.hasArchitectedSGPRs())
6890+
return false;
6891+
LLT S32 = LLT::scalar(32);
6892+
Register DstReg = MI.getOperand(0).getReg();
6893+
Register TTMP8 =
6894+
getFunctionLiveInPhysReg(B.getMF(), B.getTII(), AMDGPU::TTMP8,
6895+
AMDGPU::SReg_32RegClass, B.getDebugLoc(), S32);
6896+
auto LSB = B.buildConstant(S32, 25);
6897+
auto Width = B.buildConstant(S32, 5);
6898+
B.buildUbfx(DstReg, TTMP8, LSB, Width);
6899+
MI.eraseFromParent();
6900+
return true;
6901+
}
6902+
68866903
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
68876904
MachineInstr &MI) const {
68886905
MachineIRBuilder &B = Helper.MIRBuilder;
@@ -7005,6 +7022,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
70057022
case Intrinsic::amdgcn_workgroup_id_z:
70067023
return legalizePreloadedArgIntrin(MI, MRI, B,
70077024
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
7025+
case Intrinsic::amdgcn_wave_id:
7026+
return legalizeWaveID(MI, B);
70087027
case Intrinsic::amdgcn_lds_kernel_id:
70097028
return legalizePreloadedArgIntrin(MI, MRI, B,
70107029
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
212212

213213
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
214214
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
215+
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
215216

216217
bool legalizeImageIntrinsic(
217218
MachineInstr &MI, MachineIRBuilder &B,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7921,6 +7921,18 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
79217921
return Loads[0];
79227922
}
79237923

7924+
SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
7925+
// With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
7926+
if (!Subtarget->hasArchitectedSGPRs())
7927+
return {};
7928+
SDLoc SL(Op);
7929+
MVT VT = MVT::i32;
7930+
SDValue TTMP8 = CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
7931+
AMDGPU::TTMP8, VT, SL);
7932+
return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
7933+
DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
7934+
}
7935+
79247936
SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
79257937
unsigned Dim,
79267938
const ArgDescriptor &Arg) const {
@@ -8091,6 +8103,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
80918103
case Intrinsic::amdgcn_workgroup_id_z:
80928104
return getPreloadedValue(DAG, *MFI, VT,
80938105
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
8106+
case Intrinsic::amdgcn_wave_id:
8107+
return lowerWaveID(DAG, Op);
80948108
case Intrinsic::amdgcn_lds_kernel_id: {
80958109
if (MFI->isEntryFunction())
80968110
return getLDSKernelId(DAG, DL);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
8080
SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
8181
unsigned NewOpcode) const;
8282

83+
SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
8384
SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
8485
const ArgDescriptor &ArgDesc) const;
8586

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX9-SDAG-ERR %s
3+
; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX9-GISEL-ERR %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
8+
9+
; GFX9-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.wave.id
10+
; GFX9-GISEL-ERR: LLVM ERROR: unable to legalize instruction: {{.*}} = G_INTRINSIC intrinsic(@llvm.amdgcn.wave.id)
11+
12+
define amdgpu_cs void @test_wave_id(ptr addrspace(1) %out) {
13+
; GFX9-LABEL: test_wave_id:
14+
; GFX9: ; %bb.0:
15+
; GFX9-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
16+
; GFX9-NEXT: v_mov_b32_e32 v2, s0
17+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
18+
; GFX9-NEXT: s_endpgm
19+
;
20+
; GFX12-LABEL: test_wave_id:
21+
; GFX12: ; %bb.0:
22+
; GFX12-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
23+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
24+
; GFX12-NEXT: v_mov_b32_e32 v2, s0
25+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
26+
; GFX12-NEXT: s_nop 0
27+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
28+
; GFX12-NEXT: s_endpgm
29+
%waveid = call i32 @llvm.amdgcn.wave.id()
30+
store i32 %waveid, ptr addrspace(1) %out
31+
ret void
32+
}
33+
34+
define amdgpu_gfx void @test_wave_id_callable(ptr addrspace(1) %out) {
35+
; GFX9-LABEL: test_wave_id_callable:
36+
; GFX9: ; %bb.0:
37+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38+
; GFX9-NEXT: s_bfe_u32 s34, ttmp8, 0x50019
39+
; GFX9-NEXT: v_mov_b32_e32 v2, s34
40+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
41+
; GFX9-NEXT: s_waitcnt vmcnt(0)
42+
; GFX9-NEXT: s_setpc_b64 s[30:31]
43+
;
44+
; GFX12-LABEL: test_wave_id_callable:
45+
; GFX12: ; %bb.0:
46+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
47+
; GFX12-NEXT: s_wait_expcnt 0x0
48+
; GFX12-NEXT: s_wait_samplecnt 0x0
49+
; GFX12-NEXT: s_wait_bvhcnt 0x0
50+
; GFX12-NEXT: s_wait_kmcnt 0x0
51+
; GFX12-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
52+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
53+
; GFX12-NEXT: v_mov_b32_e32 v2, s0
54+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
55+
; GFX12-NEXT: s_setpc_b64 s[30:31]
56+
%waveid = call i32 @llvm.amdgcn.wave.id()
57+
store i32 %waveid, ptr addrspace(1) %out
58+
ret void
59+
}
60+
61+
declare i32 @llvm.amdgcn.wave.id()

0 commit comments

Comments
 (0)