Skip to content

[AMDGPU][TTI] Add Target Hook for Instruction Uniformity (getInstructionUniformity) #137639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/ADT/GenericUniformityImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Uniformity.h"
#include "llvm/Support/raw_ostream.h"

#define DEBUG_TYPE "uniformity"
Expand Down Expand Up @@ -406,6 +407,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {

void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
const CycleT *);
/// @brief Uniformity of any instruction operands.
/// @param I instruction.
/// @return vector containing boolean value for corrosponding operands.
llvm::SmallVector<InstructionUniformity>
getOperandUniformities(const Instruction &I) const;

protected:
/// \brief Value/block pair representing a single phi input.
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
Expand Down Expand Up @@ -1916,6 +1917,14 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;

/// Target can implement more complex patterns for getting Uniformity of an
/// instruction.Currently Uniformity analysis catagorises instructions with a
/// fixed set of InstructionUniformity values: Default, AlwaysUniform and
/// NeverUniform.
std::optional<InstructionUniformity> getInstructionUniformity(
const Instruction &I,
SmallVector<InstructionUniformity> OperandUniformities) const;

private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,12 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}

virtual std::optional<InstructionUniformity> getInstructionUniformity(
const Instruction &I,
SmallVector<InstructionUniformity> OperandUniformities) const {
return std::nullopt;
}

protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,13 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}

std::optional<InstructionUniformity>
TargetTransformInfo::getInstructionUniformity(
const Instruction &I,
SmallVector<InstructionUniformity> OperandUniformities) const {
return TTIImpl->getInstructionUniformity(I, OperandUniformities);
}

TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;

TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
Expand Down
59 changes: 45 additions & 14 deletions llvm/lib/Analysis/UniformityAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,43 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
return markDivergent(cast<Value>(&Instr));
}

template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
const Use &U) const {
const auto *V = U.get();
if (isDivergent(V))
return true;
if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
const auto *UseInstr = cast<Instruction>(U.getUser());
return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
}
return false;
}

template <>
llvm::SmallVector<InstructionUniformity>
llvm::GenericUniformityAnalysisImpl<SSAContext>::getOperandUniformities(
const Instruction &I) const {
SmallVector<InstructionUniformity> OperandUniformities;
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
if (!isa<Instruction>(I.getOperand(i)) && !isa<Argument>(I.getOperand(i)))
continue;
const Use &U = I.getOperandUse(i);
OperandUniformities.push_back(isDivergentUse(U)
? InstructionUniformity::NeverUniform
: InstructionUniformity::AlwaysUniform);
}
return OperandUniformities;
}

template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
if (TTI->isSourceOfDivergence(&I))
markDivergent(I);
else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
}

for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
markDivergent(&Arg);
Expand All @@ -47,9 +77,23 @@ template <>
void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
const Value *V) {
for (const auto *User : V->users()) {
if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
const auto *UserInstr = dyn_cast<const Instruction>(User);
if (!UserInstr)
continue;

if (!TTI) {
markDivergent(*UserInstr);
continue;
}

auto Uniformity = TTI->getInstructionUniformity(
*UserInstr, getOperandUniformities(*UserInstr));
if (!Uniformity || *Uniformity == InstructionUniformity::Default)
markDivergent(*UserInstr); // fallback: conservative
else if (*Uniformity == InstructionUniformity::NeverUniform)
markDivergent(*UserInstr);
else if (*Uniformity == InstructionUniformity::AlwaysUniform)
addUniformOverride(*UserInstr);
}
}

Expand Down Expand Up @@ -88,19 +132,6 @@ void llvm::GenericUniformityAnalysisImpl<
}
}

template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
const Use &U) const {
const auto *V = U.get();
if (isDivergent(V))
return true;
if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
const auto *UseInstr = cast<Instruction>(U.getUser());
return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
}
return false;
}

// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1422,3 +1422,21 @@ void GCNTTIImpl::collectKernelLaunchBounds(
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
}

std::optional<InstructionUniformity> GCNTTIImpl::getInstructionUniformity(
const Instruction &I,
SmallVector<InstructionUniformity> OperandUniformities) const {
if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_permlane64:
if (llvm::any_of(OperandUniformities, [](InstructionUniformity U) {
return U == InstructionUniformity::AlwaysUniform;
}))
return InstructionUniformity::AlwaysUniform;
return InstructionUniformity::Default;
default:
break;
}
}
return std::nullopt;
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
std::optional<InstructionUniformity> getInstructionUniformity(
const Instruction &I,
SmallVector<InstructionUniformity> OperandUniformities) const override;
};

} // end namespace llvm
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) {
%v = call i32 @llvm.amdgcn.permlane64(i32 7)
store i32 %v, ptr addrspace(1) %out
ret void
}

; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
%v = call i32 @llvm.amdgcn.permlane64(i32 %src)
store i32 %v, ptr addrspace(1) %out
ret void
}

; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %tid)
define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%v = call i32 @llvm.amdgcn.permlane64(i32 %tid)
%out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 %v, i32 addrspace(1)* %out_ptr
ret void
}
Loading