Skip to content

Commit 8aedad0

Browse files
committed
[AMDGPU] Add functions for composing and decomposing S_WAIT_DEPCTR operands
Add functions AMDGPU::DepCtr::encodeField*() and AMDGPU::DepCtr::decodeField*() for each of vm_vsrc, va_vdst and sa_sdst. These are now used in AMDGPUInsertDelayAlu and GCNHazardRecognizer so as to make working with S_WAITCNT_DEPCTR operands easier and more readable. Differential Revision: https://reviews.llvm.org/D154424
1 parent d732965 commit 8aedad0

File tree

4 files changed

+94
-12
lines changed

4 files changed

+94
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
5151
MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
5252
return true;
5353
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
54-
(MI.getOperand(0).getImm() & 0xf000) == 0)
54+
AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)
5555
return true;
5656
return false;
5757
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
11701170
(MI.getOpcode() == AMDGPU::S_WAITCNT &&
11711171
!MI.getOperand(0).getImm()) ||
11721172
(MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1173-
MI.getOperand(0).getImm() == 0xffe3);
1173+
AMDGPU::DepCtr::decodeFieldVmVsrc(MI.getOperand(0).getImm()) == 0);
11741174
};
11751175

11761176
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -1180,7 +1180,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
11801180
const SIInstrInfo *TII = ST.getInstrInfo();
11811181
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
11821182
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1183-
.addImm(0xffe3);
1183+
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
11841184
return true;
11851185
}
11861186

@@ -1293,7 +1293,8 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
12931293
return true;
12941294
}
12951295
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1296-
(MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
1296+
AMDGPU::DepCtr::encodeFieldSaSdst(MI.getOperand(0).getImm(), 0) ==
1297+
0xfffe)
12971298
return true;
12981299
return false;
12991300
};
@@ -1304,7 +1305,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
13041305

13051306
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
13061307
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1307-
.addImm(0xfffe);
1308+
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
13081309
return true;
13091310
}
13101311

@@ -1452,7 +1453,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
14521453
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
14531454
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
14541455
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1455-
I.getOperand(0).getImm() == 0xffe3);
1456+
AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
14561457
};
14571458

14581459
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -1461,7 +1462,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
14611462

14621463
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
14631464
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
1464-
.addImm(0xffe3);
1465+
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
14651466

14661467
return true;
14671468
}
@@ -1523,7 +1524,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
15231524
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
15241525
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
15251526
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1526-
I.getOperand(0).getImm() == 0x0fff))
1527+
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
15271528
return HazardExpired;
15281529

15291530
// Track registers writes
@@ -1685,10 +1686,10 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
16851686
return false;
16861687

16871688
// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
1688-
// avoided (mask 0x0fff achieves this).
1689+
// avoided.
16891690
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
16901691
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
1691-
.addImm(0x0fff);
1692+
.addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
16921693

16931694
return true;
16941695
}
@@ -2779,7 +2780,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
27792780
auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) {
27802781
// s_waitcnt_depctr sa_sdst(0) mitigates hazard.
27812782
if (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
2782-
!(I.getOperand(0).getImm() & 0x1))
2783+
AMDGPU::DepCtr::decodeFieldSaSdst(I.getOperand(0).getImm()) == 0)
27832784
return true;
27842785

27852786
// VALU access to any SGPR or literal constant other than HazardReg
@@ -2829,7 +2830,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
28292830
// Add s_waitcnt_depctr sa_sdst(0) after SALU write.
28302831
BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
28312832
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
2832-
.addImm(0xfffe);
2833+
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
28332834

28342835
// SALU write may be s_getpc in a bundle.
28352836
if (MI->getOpcode() == AMDGPU::S_GETPC_B64) {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,24 @@ unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
9595
return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
9696
}
9797

98+
/// \returns VmVsrc bit width
99+
inline unsigned getVmVsrcBitWidth() { return 3; }
100+
101+
/// \returns VmVsrc bit shift
102+
inline unsigned getVmVsrcBitShift() { return 2; }
103+
104+
/// \returns VaVdst bit width
105+
inline unsigned getVaVdstBitWidth() { return 4; }
106+
107+
/// \returns VaVdst bit shift
108+
inline unsigned getVaVdstBitShift() { return 12; }
109+
110+
/// \returns SaSdst bit width
111+
inline unsigned getSaSdstBitWidth() { return 1; }
112+
113+
/// \returns SaSdst bit shift
114+
inline unsigned getSaSdstBitShift() { return 0; }
115+
98116
} // end namespace anonymous
99117

100118
namespace llvm {
@@ -1501,6 +1519,42 @@ int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
15011519
STI);
15021520
}
15031521

1522+
unsigned decodeFieldVmVsrc(unsigned Encoded) {
1523+
return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1524+
}
1525+
1526+
unsigned decodeFieldVaVdst(unsigned Encoded) {
1527+
return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1528+
}
1529+
1530+
unsigned decodeFieldSaSdst(unsigned Encoded) {
1531+
return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1532+
}
1533+
1534+
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1535+
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1536+
}
1537+
1538+
unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1539+
return encodeFieldVmVsrc(0xffff, VmVsrc);
1540+
}
1541+
1542+
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1543+
return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1544+
}
1545+
1546+
unsigned encodeFieldVaVdst(unsigned VaVdst) {
1547+
return encodeFieldVaVdst(0xffff, VaVdst);
1548+
}
1549+
1550+
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1551+
return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1552+
}
1553+
1554+
unsigned encodeFieldSaSdst(unsigned SaSdst) {
1555+
return encodeFieldSaSdst(0xffff, SaSdst);
1556+
}
1557+
15041558
} // namespace DepCtr
15051559

15061560
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,33 @@ bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
977977
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
978978
bool &IsDefault, const MCSubtargetInfo &STI);
979979

980+
/// \returns Decoded VaVdst from given immediate \p Encoded.
981+
unsigned decodeFieldVaVdst(unsigned Encoded);
982+
983+
/// \returns Decoded VmVsrc from given immediate \p Encoded.
984+
unsigned decodeFieldVmVsrc(unsigned Encoded);
985+
986+
/// \returns Decoded SaSdst from given immediate \p Encoded.
987+
unsigned decodeFieldSaSdst(unsigned Encoded);
988+
989+
/// \returns \p VmVsrc as an encoded Depctr immediate.
990+
unsigned encodeFieldVmVsrc(unsigned VmVsrc);
991+
992+
/// \returns \p Encoded combined with encoded \p VmVsrc.
993+
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
994+
995+
/// \returns \p VaVdst as an encoded Depctr immediate.
996+
unsigned encodeFieldVaVdst(unsigned VaVdst);
997+
998+
/// \returns \p Encoded combined with encoded \p VaVdst.
999+
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1000+
1001+
/// \returns \p SaSdst as an encoded Depctr immediate.
1002+
unsigned encodeFieldSaSdst(unsigned SaSdst);
1003+
1004+
/// \returns \p Encoded combined with encoded \p SaSdst.
1005+
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1006+
9801007
} // namespace DepCtr
9811008

9821009
namespace Exp {

0 commit comments

Comments
 (0)