Skip to content

Commit 3e6da32

Browse files
authored
[AMDGPU] Add GFX12 s_sleep_var instruction and intrinsic (#75499)
1 parent 1b6c828 commit 3e6da32

File tree

7 files changed

+81
-0
lines changed

7 files changed

+81
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,6 +1715,12 @@ def int_amdgcn_s_sleep :
17151715
IntrHasSideEffects]> {
17161716
}
17171717

1718+
def int_amdgcn_s_sleep_var
1719+
: ClangBuiltin<"__builtin_amdgcn_s_sleep_var">,
1720+
Intrinsic<[], [llvm_i32_ty],
1721+
[IntrNoMem, IntrHasSideEffects, IntrWillReturn]> {
1722+
}
1723+
17181724
def int_amdgcn_s_nop :
17191725
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
17201726
IntrHasSideEffects]> {

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
31013101
applyDefaultMapping(OpdMapper);
31023102
constrainOpWithReadfirstlane(B, MI, 8); // M0
31033103
return;
3104+
case Intrinsic::amdgcn_s_sleep_var:
3105+
assert(OpdMapper.getVRegs(1).empty());
3106+
constrainOpWithReadfirstlane(B, MI, 1);
3107+
return;
31043108
case Intrinsic::amdgcn_s_barrier_signal_var:
31053109
case Intrinsic::amdgcn_s_barrier_join:
31063110
case Intrinsic::amdgcn_s_wakeup_barrier:
@@ -4849,6 +4853,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48494853
getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1
48504854
break;
48514855
}
4856+
case Intrinsic::amdgcn_s_sleep_var:
4857+
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
4858+
break;
48524859
case Intrinsic::amdgcn_s_barrier_signal_var:
48534860
case Intrinsic::amdgcn_s_barrier_join:
48544861
case Intrinsic::amdgcn_s_wakeup_barrier:

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6564,6 +6564,19 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
65646564
}
65656565
}
65666566

6567+
// Legalize s_sleep_var.
6568+
if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6569+
const DebugLoc &DL = MI.getDebugLoc();
6570+
Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6571+
int Src0Idx =
6572+
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
6573+
MachineOperand &Src0 = MI.getOperand(Src0Idx);
6574+
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
6575+
.add(Src0);
6576+
Src0.ChangeToRegister(Reg, false);
6577+
return nullptr;
6578+
}
6579+
65676580
// Legalize MUBUF instructions.
65686581
bool isSoffsetLegal = true;
65696582
int SoffsetIdx =

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,10 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
16021602
"$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
16031603
}
16041604

1605+
def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> {
1606+
let hasSideEffects = 1;
1607+
}
1608+
16051609
def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
16061610
[(int_amdgcn_s_setprio timm:$simm16)]> {
16071611
}
@@ -1997,6 +2001,7 @@ defm S_GET_BARRIER_STATE_IMM : SOP1_Real_gfx12<0x050>;
19972001
defm S_BARRIER_INIT_IMM : SOP1_Real_gfx12<0x051>;
19982002
defm S_BARRIER_JOIN_IMM : SOP1_Real_gfx12<0x052>;
19992003
defm S_WAKEUP_BARRIER_IMM : SOP1_Real_gfx12<0x057>;
2004+
defm S_SLEEP_VAR : SOP1_Real_gfx12<0x058>;
20002005

20012006
//===----------------------------------------------------------------------===//
20022007
// SOP1 - GFX1150, GFX12
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s
4+
5+
declare void @llvm.amdgcn.s.sleep.var(i32)
6+
7+
define void @test_s_sleep_var1(i32 %arg) {
8+
; GCN-LABEL: test_s_sleep_var1:
9+
; GCN: ; %bb.0:
10+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; GCN-NEXT: v_readfirstlane_b32 s0, v0
12+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
13+
; GCN-NEXT: s_sleep_var s0
14+
; GCN-NEXT: s_setpc_b64 s[30:31]
15+
call void @llvm.amdgcn.s.sleep.var(i32 %arg)
16+
ret void
17+
}
18+
19+
define void @test_s_sleep_var2() {
20+
; GCN-LABEL: test_s_sleep_var2:
21+
; GCN: ; %bb.0:
22+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23+
; GCN-NEXT: s_sleep_var 10
24+
; GCN-NEXT: s_setpc_b64 s[30:31]
25+
call void @llvm.amdgcn.s.sleep.var(i32 10)
26+
ret void
27+
}
28+
29+
define amdgpu_kernel void @test_s_sleep_var3(i32 %arg) {
30+
; GCN-LABEL: test_s_sleep_var3:
31+
; GCN: ; %bb.0:
32+
; GCN-NEXT: s_load_b32 s0, s[0:1], 0x24
33+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
34+
; GCN-NEXT: s_sleep_var s0
35+
; GCN-NEXT: s_endpgm
36+
call void @llvm.amdgcn.s.sleep.var(i32 %arg)
37+
ret void
38+
}

llvm/test/MC/AMDGPU/gfx12_asm_sop1.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
22

3+
s_sleep_var 0x1234
4+
// GFX12: encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
5+
6+
s_sleep_var s1
7+
// GFX12: encoding: [0x01,0x58,0x80,0xbe]
8+
39
s_cvt_f32_i32 s5, s1
410
// GFX12: encoding: [0x01,0x64,0x85,0xbe]
511

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
22

3+
# GFX12: s_sleep_var 0x1234 ; encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
4+
0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00
5+
6+
# GFX12: s_sleep_var s1 ; encoding: [0x01,0x58,0x80,0xbe]
7+
0x01,0x58,0x80,0xbe
8+
39
# GFX12: s_cvt_f32_i32 s5, s1 ; encoding: [0x01,0x64,0x85,0xbe]
410
0x01,0x64,0x85,0xbe
511

0 commit comments

Comments
 (0)