Skip to content

Commit fa92d51

Browse files
authored
[VP] Merge ExpandVP pass into PreISelIntrinsicLowering (#101652)
Similar to #97727; avoid an extra pass over the entire IR by performing the lowering as part of the pre-isel-intrinsic-lowering pass.
1 parent c2f92fa commit fa92d51

25 files changed

+65
-149
lines changed

llvm/include/llvm/CodeGen/ExpandVectorPredication.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@
1313

1414
namespace llvm {
1515

16-
class ExpandVectorPredicationPass
17-
: public PassInfoMixin<ExpandVectorPredicationPass> {
18-
public:
19-
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
20-
};
16+
class TargetTransformInfo;
17+
class VPIntrinsic;
18+
19+
/// Expand a vector predication intrinsic. Returns true if the intrinsic was
20+
/// removed/replaced.
21+
bool expandVectorPredicationIntrinsic(VPIntrinsic &VPI,
22+
const TargetTransformInfo &TTI);
23+
2124
} // end namespace llvm
2225

2326
#endif // LLVM_CODEGEN_EXPANDVECTORPREDICATION_H

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -513,11 +513,6 @@ namespace llvm {
513513
// the corresponding function in a vector library (e.g., SVML, libmvec).
514514
FunctionPass *createReplaceWithVeclibLegacyPass();
515515

516-
/// This pass expands the vector predication intrinsics into unpredicated
517-
/// instructions with selects or just the explicit vector length into the
518-
/// predicate mask.
519-
FunctionPass *createExpandVectorPredicationPass();
520-
521516
// Expands large div/rem instructions.
522517
FunctionPass *createExpandLargeDivRemPass();
523518

llvm/include/llvm/LinkAllPasses.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ namespace {
119119
(void) llvm::createMergeICmpsLegacyPass();
120120
(void) llvm::createExpandLargeDivRemPass();
121121
(void)llvm::createExpandMemCmpLegacyPass();
122-
(void) llvm::createExpandVectorPredicationPass();
123122
std::string buf;
124123
llvm::raw_string_ostream os(buf);
125124
(void) llvm::createPrintModulePass(os);

llvm/include/llvm/Passes/MachinePassRegistry.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM))
5353
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM))
5454
FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
5555
FUNCTION_PASS("expand-reductions", ExpandReductionsPass())
56-
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass())
5756
FUNCTION_PASS("gc-lowering", GCLoweringPass())
5857
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(TM))
5958
FUNCTION_PASS("interleaved-access", InterleavedAccessPass(TM))

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 35 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This pass implements IR expansion for vector predication intrinsics, allowing
9+
// This file implements IR expansion for vector predication intrinsics, allowing
1010
// targets to enable vector predication until just before codegen.
1111
//
1212
//===----------------------------------------------------------------------===//
@@ -16,16 +16,13 @@
1616
#include "llvm/Analysis/TargetTransformInfo.h"
1717
#include "llvm/Analysis/ValueTracking.h"
1818
#include "llvm/Analysis/VectorUtils.h"
19-
#include "llvm/CodeGen/Passes.h"
2019
#include "llvm/IR/Constants.h"
2120
#include "llvm/IR/Function.h"
2221
#include "llvm/IR/IRBuilder.h"
2322
#include "llvm/IR/InstIterator.h"
2423
#include "llvm/IR/Instructions.h"
2524
#include "llvm/IR/IntrinsicInst.h"
2625
#include "llvm/IR/Intrinsics.h"
27-
#include "llvm/InitializePasses.h"
28-
#include "llvm/Pass.h"
2926
#include "llvm/Support/CommandLine.h"
3027
#include "llvm/Support/Compiler.h"
3128
#include "llvm/Support/Debug.h"
@@ -137,7 +134,6 @@ namespace {
137134

138135
// Expansion pass state at function scope.
139136
struct CachingVPExpander {
140-
Function &F;
141137
const TargetTransformInfo &TTI;
142138

143139
/// \returns A (fixed length) vector with ascending integer indices
@@ -207,10 +203,10 @@ struct CachingVPExpander {
207203
bool UsingTTIOverrides;
208204

209205
public:
210-
CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
211-
: F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
206+
CachingVPExpander(const TargetTransformInfo &TTI)
207+
: TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
212208

213-
bool expandVectorPredication();
209+
bool expandVectorPredication(VPIntrinsic &VPI);
214210
};
215211

216212
//// CachingVPExpander {
@@ -571,7 +567,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
571567
VPIntrinsic &VPI) {
572568
assert(VPI.canIgnoreVectorLengthParam());
573569

574-
const auto &DL = F.getDataLayout();
570+
const auto &DL = VPI.getDataLayout();
575571

576572
Value *MaskParam = VPI.getMaskParam();
577573
Value *PtrParam = VPI.getMemoryPointerParam();
@@ -775,15 +771,6 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
775771

776772
//// } CachingVPExpander
777773

778-
struct TransformJob {
779-
VPIntrinsic *PI;
780-
TargetTransformInfo::VPLegalization Strategy;
781-
TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
782-
: PI(PI), Strategy(InitStrat) {}
783-
784-
bool isDone() const { return Strategy.shouldDoNothing(); }
785-
};
786-
787774
void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
788775
// Operations with speculatable lanes do not strictly need predication.
789776
if (maySpeculateLanes(VPI)) {
@@ -821,98 +808,43 @@ CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
821808
}
822809

823810
/// Expand llvm.vp.* intrinsics as requested by \p TTI.
824-
bool CachingVPExpander::expandVectorPredication() {
825-
SmallVector<TransformJob, 16> Worklist;
826-
827-
// Collect all VPIntrinsics that need expansion and determine their expansion
828-
// strategy.
829-
for (auto &I : instructions(F)) {
830-
auto *VPI = dyn_cast<VPIntrinsic>(&I);
831-
if (!VPI)
832-
continue;
833-
auto VPStrat = getVPLegalizationStrategy(*VPI);
834-
sanitizeStrategy(*VPI, VPStrat);
835-
if (!VPStrat.shouldDoNothing())
836-
Worklist.emplace_back(VPI, VPStrat);
837-
}
838-
if (Worklist.empty())
839-
return false;
811+
bool CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) {
812+
auto Strategy = getVPLegalizationStrategy(VPI);
813+
sanitizeStrategy(VPI, Strategy);
840814

841-
// Transform all VPIntrinsics on the worklist.
842-
LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
843-
<< " instructions ::::\n");
844-
for (TransformJob Job : Worklist) {
845-
// Transform the EVL parameter.
846-
switch (Job.Strategy.EVLParamStrategy) {
847-
case VPLegalization::Legal:
848-
break;
849-
case VPLegalization::Discard:
850-
discardEVLParameter(*Job.PI);
851-
break;
852-
case VPLegalization::Convert:
853-
if (foldEVLIntoMask(*Job.PI))
854-
++NumFoldedVL;
855-
break;
856-
}
857-
Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
815+
// Transform the EVL parameter.
816+
switch (Strategy.EVLParamStrategy) {
817+
case VPLegalization::Legal:
818+
break;
819+
case VPLegalization::Discard:
820+
discardEVLParameter(VPI);
821+
break;
822+
case VPLegalization::Convert:
823+
if (foldEVLIntoMask(VPI))
824+
++NumFoldedVL;
825+
break;
826+
}
858827

859-
// Replace with a non-predicated operation.
860-
switch (Job.Strategy.OpStrategy) {
861-
case VPLegalization::Legal:
862-
break;
863-
case VPLegalization::Discard:
864-
llvm_unreachable("Invalid strategy for operators.");
865-
case VPLegalization::Convert:
866-
expandPredication(*Job.PI);
828+
// Replace with a non-predicated operation.
829+
switch (Strategy.OpStrategy) {
830+
case VPLegalization::Legal:
831+
break;
832+
case VPLegalization::Discard:
833+
llvm_unreachable("Invalid strategy for operators.");
834+
case VPLegalization::Convert:
835+
if (Value *V = expandPredication(VPI); V != &VPI) {
867836
++NumLoweredVPOps;
868-
break;
837+
// Return true if and only if the intrinsic was actually removed.
838+
return true;
869839
}
870-
Job.Strategy.OpStrategy = VPLegalization::Legal;
871-
872-
assert(Job.isDone() && "incomplete transformation");
840+
break;
873841
}
874842

875-
return true;
843+
return false;
876844
}
877-
class ExpandVectorPredication : public FunctionPass {
878-
public:
879-
static char ID;
880-
ExpandVectorPredication() : FunctionPass(ID) {
881-
initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
882-
}
883-
884-
bool runOnFunction(Function &F) override {
885-
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
886-
CachingVPExpander VPExpander(F, *TTI);
887-
return VPExpander.expandVectorPredication();
888-
}
889-
890-
void getAnalysisUsage(AnalysisUsage &AU) const override {
891-
AU.addRequired<TargetTransformInfoWrapperPass>();
892-
AU.setPreservesCFG();
893-
}
894-
};
895845
} // namespace
896846

897-
char ExpandVectorPredication::ID;
898-
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
899-
"Expand vector predication intrinsics", false, false)
900-
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
901-
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
902-
INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
903-
"Expand vector predication intrinsics", false, false)
904-
905-
FunctionPass *llvm::createExpandVectorPredicationPass() {
906-
return new ExpandVectorPredication();
907-
}
908-
909-
PreservedAnalyses
910-
ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
911-
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
912-
CachingVPExpander VPExpander(F, TTI);
913-
if (!VPExpander.expandVectorPredication())
914-
return PreservedAnalyses::all();
915-
PreservedAnalyses PA;
916-
PA.preserveSet<CFGAnalyses>();
917-
return PA;
847+
bool llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI,
848+
const TargetTransformInfo &TTI) {
849+
return CachingVPExpander(TTI).expandVectorPredication(VPI);
918850
}

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/Analysis/ObjCARCUtil.h"
1717
#include "llvm/Analysis/TargetLibraryInfo.h"
1818
#include "llvm/Analysis/TargetTransformInfo.h"
19+
#include "llvm/CodeGen/ExpandVectorPredication.h"
1920
#include "llvm/CodeGen/Passes.h"
2021
#include "llvm/CodeGen/TargetLowering.h"
2122
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -351,6 +352,16 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
351352
return Changed;
352353
});
353354
break;
355+
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
356+
case Intrinsic::VPID:
357+
#include "llvm/IR/VPIntrinsics.def"
358+
Changed |= forEachCall(F, [&](CallInst *CI) {
359+
Function *Parent = CI->getParent()->getParent();
360+
const TargetTransformInfo &TTI = LookupTTI(*Parent);
361+
auto *VPI = cast<VPIntrinsic>(CI);
362+
return expandVectorPredicationIntrinsic(*VPI, TTI);
363+
});
364+
break;
354365
case Intrinsic::objc_autorelease:
355366
Changed |= lowerObjCCall(F, "objc_autorelease");
356367
break;

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -865,11 +865,6 @@ void TargetPassConfig::addIRPasses() {
865865
if (getOptLevel() != CodeGenOptLevel::None && !DisablePartialLibcallInlining)
866866
addPass(createPartiallyInlineLibCallsPass());
867867

868-
// Expand vector predication intrinsics into standard IR instructions.
869-
// This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
870-
// passes since it emits those kinds of intrinsics.
871-
addPass(createExpandVectorPredicationPass());
872-
873868
// Instrument function entry after all inlining.
874869
addPass(createPostInlineEntryExitInstrumenterPass());
875870

llvm/test/CodeGen/AArch64/O0-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
; CHECK-NEXT: Lower Garbage Collection Instructions
2323
; CHECK-NEXT: Shadow Stack GC Lowering
2424
; CHECK-NEXT: Remove unreachable blocks from the CFG
25-
; CHECK-NEXT: Expand vector predication intrinsics
2625
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
2726
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
2827
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/AArch64/O3-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
; CHECK-NEXT: Constant Hoisting
6161
; CHECK-NEXT: Replace intrinsics with calls to vector library
6262
; CHECK-NEXT: Partially inline calls to library functions
63-
; CHECK-NEXT: Expand vector predication intrinsics
6463
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
6564
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
6665
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
; GCN-O0-NEXT: FunctionPass Manager
4444
; GCN-O0-NEXT: Expand Atomic instructions
4545
; GCN-O0-NEXT: Remove unreachable blocks from the CFG
46-
; GCN-O0-NEXT: Expand vector predication intrinsics
4746
; GCN-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
4847
; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics
4948
; GCN-O0-NEXT: Expand reduction intrinsics
@@ -222,7 +221,6 @@
222221
; GCN-O1-NEXT: Constant Hoisting
223222
; GCN-O1-NEXT: Replace intrinsics with calls to vector library
224223
; GCN-O1-NEXT: Partially inline calls to library functions
225-
; GCN-O1-NEXT: Expand vector predication intrinsics
226224
; GCN-O1-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
227225
; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics
228226
; GCN-O1-NEXT: Expand reduction intrinsics
@@ -508,7 +506,6 @@
508506
; GCN-O1-OPTS-NEXT: Constant Hoisting
509507
; GCN-O1-OPTS-NEXT: Replace intrinsics with calls to vector library
510508
; GCN-O1-OPTS-NEXT: Partially inline calls to library functions
511-
; GCN-O1-OPTS-NEXT: Expand vector predication intrinsics
512509
; GCN-O1-OPTS-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
513510
; GCN-O1-OPTS-NEXT: Scalarize Masked Memory Intrinsics
514511
; GCN-O1-OPTS-NEXT: Expand reduction intrinsics
@@ -813,7 +810,6 @@
813810
; GCN-O2-NEXT: Constant Hoisting
814811
; GCN-O2-NEXT: Replace intrinsics with calls to vector library
815812
; GCN-O2-NEXT: Partially inline calls to library functions
816-
; GCN-O2-NEXT: Expand vector predication intrinsics
817813
; GCN-O2-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
818814
; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics
819815
; GCN-O2-NEXT: Expand reduction intrinsics
@@ -1126,7 +1122,6 @@
11261122
; GCN-O3-NEXT: Constant Hoisting
11271123
; GCN-O3-NEXT: Replace intrinsics with calls to vector library
11281124
; GCN-O3-NEXT: Partially inline calls to library functions
1129-
; GCN-O3-NEXT: Expand vector predication intrinsics
11301125
; GCN-O3-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
11311126
; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics
11321127
; GCN-O3-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
; CHECK-NEXT: Constant Hoisting
3939
; CHECK-NEXT: Replace intrinsics with calls to vector library
4040
; CHECK-NEXT: Partially inline calls to library functions
41-
; CHECK-NEXT: Expand vector predication intrinsics
4241
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
4342
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
4443
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/LoongArch/O0-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
; CHECK-NEXT: Lower Garbage Collection Instructions
2727
; CHECK-NEXT: Shadow Stack GC Lowering
2828
; CHECK-NEXT: Remove unreachable blocks from the CFG
29-
; CHECK-NEXT: Expand vector predication intrinsics
3029
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
3130
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
3231
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/LoongArch/opt-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161
; LAXX-NEXT: Constant Hoisting
6262
; LAXX-NEXT: Replace intrinsics with calls to vector library
6363
; LAXX-NEXT: Partially inline calls to library functions
64-
; LAXX-NEXT: Expand vector predication intrinsics
6564
; LAXX-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
6665
; LAXX-NEXT: Scalarize Masked Memory Intrinsics
6766
; LAXX-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/PowerPC/O0-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
; CHECK-NEXT: Lower Garbage Collection Instructions
2626
; CHECK-NEXT: Shadow Stack GC Lowering
2727
; CHECK-NEXT: Remove unreachable blocks from the CFG
28-
; CHECK-NEXT: Expand vector predication intrinsics
2928
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
3029
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
3130
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/PowerPC/O3-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
; CHECK-NEXT: Constant Hoisting
6363
; CHECK-NEXT: Replace intrinsics with calls to vector library
6464
; CHECK-NEXT: Partially inline calls to library functions
65-
; CHECK-NEXT: Expand vector predication intrinsics
6665
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
6766
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
6867
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
; CHECK-NEXT: Lower Garbage Collection Instructions
2727
; CHECK-NEXT: Shadow Stack GC Lowering
2828
; CHECK-NEXT: Remove unreachable blocks from the CFG
29-
; CHECK-NEXT: Expand vector predication intrinsics
3029
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
3130
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
3231
; CHECK-NEXT: Expand reduction intrinsics

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
; CHECK-NEXT: Constant Hoisting
6363
; CHECK-NEXT: Replace intrinsics with calls to vector library
6464
; CHECK-NEXT: Partially inline calls to library functions
65-
; CHECK-NEXT: Expand vector predication intrinsics
6665
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
6766
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
6867
; CHECK-NEXT: Expand reduction intrinsics

0 commit comments

Comments
 (0)