Skip to content

Commit b90cfe4

Browse files
authored
[AMDGPU] New ttracedata intrinsics (#70235)
Add llvm.amdgcn.s.ttracedata and llvm.amdgcn.s.ttracedata.imm which map directly to the corresponding instructions s_ttracedata and s_ttracedata_imm. These are inherently whole-wave operations so any non-uniform inputs are readfirstlaned.
1 parent 3a223f4 commit b90cfe4

File tree

4 files changed

+77
-2
lines changed

4 files changed

+77
-2
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,13 @@ def int_amdgcn_s_setprio :
16971697
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
16981698
IntrHasSideEffects]>;
16991699

1700+
def int_amdgcn_s_ttracedata :
1701+
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
1702+
[IntrNoMem, IntrHasSideEffects]>;
1703+
def int_amdgcn_s_ttracedata_imm :
1704+
DefaultAttrsIntrinsic<[], [llvm_i16_ty],
1705+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
1706+
17001707
// This is IntrHasSideEffects so it can be used to read cycle counters.
17011708
def int_amdgcn_s_getreg :
17021709
ClangBuiltin<"__builtin_amdgcn_s_getreg">,

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,6 +3066,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
30663066
constrainOpWithReadfirstlane(B, MI, 2);
30673067
return;
30683068
}
3069+
case Intrinsic::amdgcn_s_ttracedata:
3070+
constrainOpWithReadfirstlane(B, MI, 1); // M0
3071+
return;
30693072
case Intrinsic::amdgcn_raw_buffer_load_lds:
30703073
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
30713074
applyDefaultMapping(OpdMapper);
@@ -4670,6 +4673,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
46704673
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
46714674
break;
46724675
}
4676+
case Intrinsic::amdgcn_s_ttracedata: {
4677+
// This must be an SGPR, but accept a VGPR.
4678+
unsigned Bank =
4679+
getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
4680+
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4681+
break;
4682+
}
46734683
case Intrinsic::amdgcn_end_cf: {
46744684
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
46754685
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,10 @@ def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm
15031503
def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
15041504
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
15051505
}
1506-
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {
1506+
1507+
let Uses = [M0] in
1508+
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "",
1509+
[(int_amdgcn_s_ttracedata M0)]> {
15071510
let simm16 = 0;
15081511
let fixed_imm = 1;
15091512
}
@@ -1547,8 +1550,10 @@ let SubtargetPredicate = isGFX10Plus in {
15471550
[(SIdenorm_mode (i32 timm:$simm16))]>;
15481551
}
15491552

1553+
let hasSideEffects = 1 in
15501554
def S_TTRACEDATA_IMM :
1551-
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
1555+
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16",
1556+
[(int_amdgcn_s_ttracedata_imm timm:$simm16)]>;
15521557
} // End SubtargetPredicate = isGFX10Plus
15531558

15541559
let SubtargetPredicate = isGFX11Plus in {
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
3+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
4+
5+
declare void @llvm.amdgcn.s.ttracedata(i32)
6+
declare void @llvm.amdgcn.s.ttracedata.imm(i16)
7+
8+
define amdgpu_cs void @ttracedata_c() {
9+
; GFX11-LABEL: ttracedata_c:
10+
; GFX11: ; %bb.0:
11+
; GFX11-NEXT: s_mov_b32 m0, 0xf4240
12+
; GFX11-NEXT: s_ttracedata
13+
; GFX11-NEXT: s_endpgm
14+
call void @llvm.amdgcn.s.ttracedata(i32 1000000)
15+
ret void
16+
}
17+
18+
define amdgpu_cs void @ttracedata_s(i32 inreg %val) {
19+
; GFX11-LABEL: ttracedata_s:
20+
; GFX11: ; %bb.0:
21+
; GFX11-NEXT: s_mov_b32 m0, s0
22+
; GFX11-NEXT: s_ttracedata
23+
; GFX11-NEXT: s_endpgm
24+
call void @llvm.amdgcn.s.ttracedata(i32 %val)
25+
ret void
26+
}
27+
28+
define amdgpu_cs void @ttracedata_v(i32 %val) {
29+
; GFX11-SDAG-LABEL: ttracedata_v:
30+
; GFX11-SDAG: ; %bb.0:
31+
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
32+
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
33+
; GFX11-SDAG-NEXT: s_mov_b32 m0, s0
34+
; GFX11-SDAG-NEXT: s_ttracedata
35+
; GFX11-SDAG-NEXT: s_endpgm
36+
;
37+
; GFX11-GISEL-LABEL: ttracedata_v:
38+
; GFX11-GISEL: ; %bb.0:
39+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0
40+
; GFX11-GISEL-NEXT: s_ttracedata
41+
; GFX11-GISEL-NEXT: s_endpgm
42+
call void @llvm.amdgcn.s.ttracedata(i32 %val)
43+
ret void
44+
}
45+
46+
define amdgpu_cs void @ttracedata_imm() {
47+
; GFX11-LABEL: ttracedata_imm:
48+
; GFX11: ; %bb.0:
49+
; GFX11-NEXT: s_ttracedata_imm 0x3e8
50+
; GFX11-NEXT: s_endpgm
51+
call void @llvm.amdgcn.s.ttracedata.imm(i16 1000)
52+
ret void
53+
}

0 commit comments

Comments
 (0)