Skip to content

[VP] Merge ExpandVP pass into PreISelIntrinsicLowering #101652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions llvm/include/llvm/CodeGen/ExpandVectorPredication.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@

namespace llvm {

class ExpandVectorPredicationPass
: public PassInfoMixin<ExpandVectorPredicationPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
class TargetTransformInfo;
class VPIntrinsic;

/// Expand a vector predication intrinsic. Returns true if the intrinsic was
/// removed/replaced.
bool expandVectorPredicationIntrinsic(VPIntrinsic &VPI,
const TargetTransformInfo &TTI);

} // end namespace llvm

#endif // LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
5 changes: 0 additions & 5 deletions llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,11 +513,6 @@ namespace llvm {
// the corresponding function in a vector library (e.g., SVML, libmvec).
FunctionPass *createReplaceWithVeclibLegacyPass();

/// This pass expands the vector predication intrinsics into unpredicated
/// instructions with selects or just the explicit vector length into the
/// predicate mask.
FunctionPass *createExpandVectorPredicationPass();

// Expands large div/rem instructions.
FunctionPass *createExpandLargeDivRemPass();

Expand Down
1 change: 0 additions & 1 deletion llvm/include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ namespace {
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
(void)llvm::createExpandMemCmpLegacyPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os);
Expand Down
1 change: 0 additions & 1 deletion llvm/include/llvm/Passes/MachinePassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM))
FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("expand-reductions", ExpandReductionsPass())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass())
FUNCTION_PASS("gc-lowering", GCLoweringPass())
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(TM))
FUNCTION_PASS("interleaved-access", InterleavedAccessPass(TM))
Expand Down
138 changes: 35 additions & 103 deletions llvm/lib/CodeGen/ExpandVectorPredication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for vector predication intrinsics, allowing
// This file implements IR expansion for vector predication intrinsics, allowing
// targets to enable vector predication until just before codegen.
//
//===----------------------------------------------------------------------===//
Expand All @@ -16,16 +16,13 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -137,7 +134,6 @@ namespace {

// Expansion pass state at function scope.
struct CachingVPExpander {
Function &F;
const TargetTransformInfo &TTI;

/// \returns A (fixed length) vector with ascending integer indices
Expand Down Expand Up @@ -207,10 +203,10 @@ struct CachingVPExpander {
bool UsingTTIOverrides;

public:
CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
: F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
CachingVPExpander(const TargetTransformInfo &TTI)
: TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}

bool expandVectorPredication();
bool expandVectorPredication(VPIntrinsic &VPI);
};

//// CachingVPExpander {
Expand Down Expand Up @@ -571,7 +567,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
assert(VPI.canIgnoreVectorLengthParam());

const auto &DL = F.getDataLayout();
const auto &DL = VPI.getDataLayout();

Value *MaskParam = VPI.getMaskParam();
Value *PtrParam = VPI.getMemoryPointerParam();
Expand Down Expand Up @@ -775,15 +771,6 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {

//// } CachingVPExpander

struct TransformJob {
VPIntrinsic *PI;
TargetTransformInfo::VPLegalization Strategy;
TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
: PI(PI), Strategy(InitStrat) {}

bool isDone() const { return Strategy.shouldDoNothing(); }
};

void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
// Operations with speculatable lanes do not strictly need predication.
if (maySpeculateLanes(VPI)) {
Expand Down Expand Up @@ -821,98 +808,43 @@ CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
}

/// Expand llvm.vp.* intrinsics as requested by \p TTI.
bool CachingVPExpander::expandVectorPredication() {
SmallVector<TransformJob, 16> Worklist;

// Collect all VPIntrinsics that need expansion and determine their expansion
// strategy.
for (auto &I : instructions(F)) {
auto *VPI = dyn_cast<VPIntrinsic>(&I);
if (!VPI)
continue;
auto VPStrat = getVPLegalizationStrategy(*VPI);
sanitizeStrategy(*VPI, VPStrat);
if (!VPStrat.shouldDoNothing())
Worklist.emplace_back(VPI, VPStrat);
}
if (Worklist.empty())
return false;
bool CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) {
auto Strategy = getVPLegalizationStrategy(VPI);
sanitizeStrategy(VPI, Strategy);

// Transform all VPIntrinsics on the worklist.
LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
<< " instructions ::::\n");
for (TransformJob Job : Worklist) {
// Transform the EVL parameter.
switch (Job.Strategy.EVLParamStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
discardEVLParameter(*Job.PI);
break;
case VPLegalization::Convert:
if (foldEVLIntoMask(*Job.PI))
++NumFoldedVL;
break;
}
Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
// Transform the EVL parameter.
switch (Strategy.EVLParamStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
discardEVLParameter(VPI);
break;
case VPLegalization::Convert:
if (foldEVLIntoMask(VPI))
++NumFoldedVL;
break;
}

// Replace with a non-predicated operation.
switch (Job.Strategy.OpStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
llvm_unreachable("Invalid strategy for operators.");
case VPLegalization::Convert:
expandPredication(*Job.PI);
// Replace with a non-predicated operation.
switch (Strategy.OpStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
llvm_unreachable("Invalid strategy for operators.");
case VPLegalization::Convert:
if (Value *V = expandPredication(VPI); V != &VPI) {
++NumLoweredVPOps;
break;
// Return true if and only if the intrinsic was actually removed.
return true;
}
Job.Strategy.OpStrategy = VPLegalization::Legal;

assert(Job.isDone() && "incomplete transformation");
break;
}

return true;
return false;
}
class ExpandVectorPredication : public FunctionPass {
public:
static char ID;
ExpandVectorPredication() : FunctionPass(ID) {
initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
}

bool runOnFunction(Function &F) override {
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
CachingVPExpander VPExpander(F, *TTI);
return VPExpander.expandVectorPredication();
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
};
} // namespace

char ExpandVectorPredication::ID;
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
"Expand vector predication intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
"Expand vector predication intrinsics", false, false)

FunctionPass *llvm::createExpandVectorPredicationPass() {
return new ExpandVectorPredication();
}

PreservedAnalyses
ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
CachingVPExpander VPExpander(F, TTI);
if (!VPExpander.expandVectorPredication())
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
bool llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI,
const TargetTransformInfo &TTI) {
return CachingVPExpander(TTI).expandVectorPredication(VPI);
}
11 changes: 11 additions & 0 deletions llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExpandVectorPredication.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
Expand Down Expand Up @@ -351,6 +352,16 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
return Changed;
});
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID:
#include "llvm/IR/VPIntrinsics.def"
Changed |= forEachCall(F, [&](CallInst *CI) {
Function *Parent = CI->getParent()->getParent();
const TargetTransformInfo &TTI = LookupTTI(*Parent);
auto *VPI = cast<VPIntrinsic>(CI);
return expandVectorPredicationIntrinsic(*VPI, TTI);
});
break;
case Intrinsic::objc_autorelease:
Changed |= lowerObjCCall(F, "objc_autorelease");
break;
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/CodeGen/TargetPassConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -865,11 +865,6 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOptLevel::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());

// Expand vector predication intrinsics into standard IR instructions.
// This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
// passes since it emits those kinds of intrinsics.
addPass(createExpandVectorPredicationPass());

// Instrument function entry after all inlining.
addPass(createPostInlineEntryExitInstrumenterPass());

Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AArch64/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
5 changes: 0 additions & 5 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Expand Atomic instructions
; GCN-O0-NEXT: Remove unreachable blocks from the CFG
; GCN-O0-NEXT: Expand vector predication intrinsics
; GCN-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O0-NEXT: Expand reduction intrinsics
Expand Down Expand Up @@ -222,7 +221,6 @@
; GCN-O1-NEXT: Constant Hoisting
; GCN-O1-NEXT: Replace intrinsics with calls to vector library
; GCN-O1-NEXT: Partially inline calls to library functions
; GCN-O1-NEXT: Expand vector predication intrinsics
; GCN-O1-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-NEXT: Expand reduction intrinsics
Expand Down Expand Up @@ -508,7 +506,6 @@
; GCN-O1-OPTS-NEXT: Constant Hoisting
; GCN-O1-OPTS-NEXT: Replace intrinsics with calls to vector library
; GCN-O1-OPTS-NEXT: Partially inline calls to library functions
; GCN-O1-OPTS-NEXT: Expand vector predication intrinsics
; GCN-O1-OPTS-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-OPTS-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-OPTS-NEXT: Expand reduction intrinsics
Expand Down Expand Up @@ -813,7 +810,6 @@
; GCN-O2-NEXT: Constant Hoisting
; GCN-O2-NEXT: Replace intrinsics with calls to vector library
; GCN-O2-NEXT: Partially inline calls to library functions
; GCN-O2-NEXT: Expand vector predication intrinsics
; GCN-O2-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O2-NEXT: Expand reduction intrinsics
Expand Down Expand Up @@ -1126,7 +1122,6 @@
; GCN-O3-NEXT: Constant Hoisting
; GCN-O3-NEXT: Replace intrinsics with calls to vector library
; GCN-O3-NEXT: Partially inline calls to library functions
; GCN-O3-NEXT: Expand vector predication intrinsics
; GCN-O3-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O3-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/LoongArch/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/LoongArch/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
; LAXX-NEXT: Constant Hoisting
; LAXX-NEXT: Replace intrinsics with calls to vector library
; LAXX-NEXT: Partially inline calls to library functions
; LAXX-NEXT: Expand vector predication intrinsics
; LAXX-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; LAXX-NEXT: Scalarize Masked Memory Intrinsics
; LAXX-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/PowerPC/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/PowerPC/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
Expand Down
Loading
Loading