Skip to content

Commit e854c38

Browse files
authored
[VPlan] Manage noalias/alias_scope metadata in VPlan. (#136450)
Use VPIRMetadata added in #135272 to also manage no-alias metadata added by versioning. Note that this means we have to build the no-alias metadata up-front once. If it is not used, it will be discarded automatically. This also fixes a case where incorrect metadata was added to wide loads/stores that got converted from an interleave group. Compile-time impact is neutral: https://llvm-compile-time-tracker.com/compare.php?from=38bf1af41c5425a552a53feb13c71d82873f1c18&to=2fd7844cfdf5ec0f1c2ce0b9b3ae0763245b6922&stat=instructions:u
1 parent c255a31 commit e854c38

File tree

11 files changed

+104
-114
lines changed

11 files changed

+104
-114
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LoopVectorizationLegality;
3636
class LoopVectorizationCostModel;
3737
class PredicatedScalarEvolution;
3838
class LoopVectorizeHints;
39+
class LoopVersioning;
3940
class OptimizationRemarkEmitter;
4041
class TargetTransformInfo;
4142
class TargetLibraryInfo;
@@ -524,7 +525,7 @@ class LoopVectorizationPlanner {
524525
/// returned VPlan is valid for. If no VPlan can be built for the input range,
525526
/// set the largest included VF to the maximum VF for which no plan could be
526527
/// built.
527-
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
528+
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
528529

529530
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
530531
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7838,24 +7838,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78387838
if (VectorizingEpilogue)
78397839
VPlanTransforms::removeDeadRecipes(BestVPlan);
78407840

7841-
// Only use noalias metadata when using memory checks guaranteeing no overlap
7842-
// across all iterations.
7843-
const LoopAccessInfo *LAI = Legal->getLAI();
7844-
std::unique_ptr<LoopVersioning> LVer = nullptr;
7845-
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
7846-
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
7847-
7848-
// We currently don't use LoopVersioning for the actual loop cloning but we
7849-
// still use it to add the noalias metadata.
7850-
// TODO: Find a better way to re-use LoopVersioning functionality to add
7851-
// metadata.
7852-
LVer = std::make_unique<LoopVersioning>(
7853-
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
7854-
PSE.getSE());
7855-
State.LVer = &*LVer;
7856-
State.LVer->prepareNoAliasMetadata();
7857-
}
7858-
78597841
ILV.printDebugTracesAtStart();
78607842

78617843
//===------------------------------------------------===//
@@ -8468,11 +8450,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84688450
}
84698451
if (LoadInst *Load = dyn_cast<LoadInst>(I))
84708452
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8471-
I->getDebugLoc());
8453+
VPIRMetadata(*Load, LVer), I->getDebugLoc());
84728454

84738455
StoreInst *Store = cast<StoreInst>(I);
84748456
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8475-
Reverse, I->getDebugLoc());
8457+
Reverse, VPIRMetadata(*Store, LVer),
8458+
I->getDebugLoc());
84768459
}
84778460

84788461
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8845,7 +8828,8 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
88458828
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
88468829
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
88478830
"Should not predicate a uniform recipe");
8848-
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask);
8831+
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask,
8832+
VPIRMetadata(*I, LVer));
88498833
return Recipe;
88508834
}
88518835

@@ -9092,10 +9076,20 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
90929076
ElementCount MaxVF) {
90939077
assert(OrigLoop->isInnermost() && "Inner loop expected.");
90949078

9079+
const LoopAccessInfo *LAI = Legal->getLAI();
9080+
LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
9081+
OrigLoop, LI, DT, PSE.getSE());
9082+
if (!LAI->getRuntimePointerChecking()->getChecks().empty() &&
9083+
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
9084+
// Only use noalias metadata when using memory checks guaranteeing no
9085+
// overlap across all iterations.
9086+
LVer.prepareNoAliasMetadata();
9087+
}
9088+
90959089
auto MaxVFTimes2 = MaxVF * 2;
90969090
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
90979091
VFRange SubRange = {VF, MaxVFTimes2};
9098-
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
9092+
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
90999093
bool HasScalarVF = Plan->hasScalarVFOnly();
91009094
// Now optimize the initial VPlan.
91019095
if (!HasScalarVF)
@@ -9357,7 +9351,8 @@ static void addExitUsersForFirstOrderRecurrences(
93579351
}
93589352

93599353
VPlanPtr
9360-
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9354+
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
9355+
LoopVersioning *LVer) {
93619356

93629357
using namespace llvm::VPlanPatternMatch;
93639358
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9413,7 +9408,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94139408
}
94149409

94159410
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9416-
Builder);
9411+
Builder, LVer);
94179412

94189413
// ---------------------------------------------------------------------------
94199414
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9520,7 +9515,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95209515
// Only create recipe for the final invariant store of the reduction.
95219516
if (Legal->isInvariantStoreOfReduction(SI)) {
95229517
auto *Recipe =
9523-
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
9518+
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
9519+
nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
95249520
Recipe->insertBefore(*MiddleVPBB, MBIP);
95259521
}
95269522
R.eraseFromParent();
@@ -9702,7 +9698,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97029698
// Collect mapping of IR header phis to header phi recipes, to be used in
97039699
// addScalarResumePhis.
97049700
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9705-
Builder);
9701+
Builder, nullptr /*LVer*/);
97069702
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
97079703
if (isa<VPCanonicalIVPHIRecipe>(&R))
97089704
continue;

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class VPRecipeBuilder {
9090
/// A mapping of partial reduction exit instructions to their scaling factor.
9191
DenseMap<const Instruction *, unsigned> ScaledReductionMap;
9292

93+
/// Loop versioning instance for getting noalias metadata guaranteed by
94+
/// runtime checks.
95+
LoopVersioning *LVer;
96+
9397
/// Check if \p I can be widened at the start of \p Range and possibly
9498
/// decrease the range such that the returned value holds for the entire \p
9599
/// Range. The function should not be called for memory instructions or calls.
@@ -155,9 +159,10 @@ class VPRecipeBuilder {
155159
const TargetTransformInfo *TTI,
156160
LoopVectorizationLegality *Legal,
157161
LoopVectorizationCostModel &CM,
158-
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
162+
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
163+
LoopVersioning *LVer)
159164
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
160-
CM(CM), PSE(PSE), Builder(Builder) {}
165+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
161166

162167
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
163168
auto It = ScaledReductionMap.find(ExitInst);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
220220
IRBuilderBase &Builder, VPlan *Plan,
221221
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222222
: TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
223-
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
223+
CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
224+
VPDT(*Plan) {}
225225

226226
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227227
if (Def->isLiveIn())
@@ -350,14 +350,6 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
350350
return VectorValue;
351351
}
352352

353-
void VPTransformState::addNewMetadata(Instruction *To,
354-
const Instruction *Orig) {
355-
// If the loop was versioned with memchecks, add the corresponding no-alias
356-
// metadata.
357-
if (LVer && isa<LoadInst, StoreInst>(Orig))
358-
LVer->annotateInstWithNoAlias(To, Orig);
359-
}
360-
361353
void VPTransformState::setDebugLocFrom(DebugLoc DL) {
362354
const DILocation *DIL = DL;
363355
// When a FSDiscriminator is enabled, we don't need to add the multiply

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class VPReplicateRecipe;
6565
class VPlanSlp;
6666
class Value;
6767
class LoopVectorizationCostModel;
68+
class LoopVersioning;
6869

6970
struct VPCostContext;
7071

@@ -1236,11 +1237,20 @@ struct VPIRPhi : public VPIRInstruction {
12361237
class VPIRMetadata {
12371238
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
12381239

1239-
protected:
1240+
public:
12401241
VPIRMetadata() {}
1242+
1243+
/// Adds metatadata that can be preserved from the original instruction
1244+
/// \p I.
12411245
VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
12421246

1243-
public:
1247+
/// Adds metatadata that can be preserved from the original instruction
1248+
/// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
1249+
VPIRMetadata(Instruction &I, LoopVersioning *LVer);
1250+
1251+
/// Copy constructor for cloning.
1252+
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
1253+
12441254
/// Add all metadata to \p I.
12451255
void applyMetadata(Instruction &I) const;
12461256
};
@@ -2511,7 +2521,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
25112521
/// copies of the original scalar type, one per lane, instead of producing a
25122522
/// single copy of widened type for all lanes. If the instruction is known to be
25132523
/// uniform only one copy, per lane zero, will be generated.
2514-
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2524+
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
25152525
/// Indicator if only a single replica per lane is needed.
25162526
bool IsUniform;
25172527

@@ -2520,9 +2530,10 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
25202530

25212531
public:
25222532
VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
2523-
bool IsUniform, VPValue *Mask = nullptr)
2533+
bool IsUniform, VPValue *Mask = nullptr,
2534+
VPIRMetadata Metadata = {})
25242535
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2525-
IsUniform(IsUniform), IsPredicated(Mask) {
2536+
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
25262537
if (Mask)
25272538
addOperand(Mask);
25282539
}
@@ -2532,7 +2543,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
25322543
VPReplicateRecipe *clone() override {
25332544
auto *Copy =
25342545
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2535-
isPredicated() ? getMask() : nullptr);
2546+
isPredicated() ? getMask() : nullptr, *this);
25362547
Copy->transferFlags(*this);
25372548
return Copy;
25382549
}
@@ -2692,8 +2703,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
26922703

26932704
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
26942705
std::initializer_list<VPValue *> Operands,
2695-
bool Consecutive, bool Reverse, DebugLoc DL)
2696-
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(I), Ingredient(I),
2706+
bool Consecutive, bool Reverse,
2707+
const VPIRMetadata &Metadata, DebugLoc DL)
2708+
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
26972709
Consecutive(Consecutive), Reverse(Reverse) {
26982710
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
26992711
}
@@ -2751,16 +2763,17 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
27512763
/// optional mask.
27522764
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
27532765
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2754-
bool Consecutive, bool Reverse, DebugLoc DL)
2766+
bool Consecutive, bool Reverse,
2767+
const VPIRMetadata &Metadata, DebugLoc DL)
27552768
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2756-
Reverse, DL),
2769+
Reverse, Metadata, DL),
27572770
VPValue(this, &Load) {
27582771
setMask(Mask);
27592772
}
27602773

27612774
VPWidenLoadRecipe *clone() override {
27622775
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2763-
getMask(), Consecutive, Reverse,
2776+
getMask(), Consecutive, Reverse, *this,
27642777
getDebugLoc());
27652778
}
27662779

@@ -2792,7 +2805,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27922805
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
27932806
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
27942807
{L.getAddr(), &EVL}, L.isConsecutive(),
2795-
L.isReverse(), L.getDebugLoc()),
2808+
L.isReverse(), L, L.getDebugLoc()),
27962809
VPValue(this, &getIngredient()) {
27972810
setMask(Mask);
27982811
}
@@ -2829,16 +2842,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
28292842
/// to store to and an optional mask.
28302843
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
28312844
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2832-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2845+
VPValue *Mask, bool Consecutive, bool Reverse,
2846+
const VPIRMetadata &Metadata, DebugLoc DL)
28332847
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2834-
Consecutive, Reverse, DL) {
2848+
Consecutive, Reverse, Metadata, DL) {
28352849
setMask(Mask);
28362850
}
28372851

28382852
VPWidenStoreRecipe *clone() override {
28392853
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
28402854
getStoredValue(), getMask(), Consecutive,
2841-
Reverse, getDebugLoc());
2855+
Reverse, *this, getDebugLoc());
28422856
}
28432857

28442858
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2872,7 +2886,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28722886
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
28732887
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
28742888
{S.getAddr(), S.getStoredValue(), &EVL},
2875-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2889+
S.isConsecutive(), S.isReverse(), S,
2890+
S.getDebugLoc()) {
28762891
setMask(Mask);
28772892
}
28782893

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class VPBasicBlock;
3939
class VPRegionBlock;
4040
class VPlan;
4141
class Value;
42-
class LoopVersioning;
4342

4443
/// Returns a calculation for the total number of elements for a given \p VF.
4544
/// For fixed width vectors this value is a constant, whereas for scalable
@@ -284,13 +283,6 @@ struct VPTransformState {
284283
Iter->second[CacheIdx] = V;
285284
}
286285

287-
/// Add additional metadata to \p To that was not present on \p Orig.
288-
///
289-
/// Currently this is used to add the noalias annotations based on the
290-
/// inserted memchecks. Use this for instructions that are *cloned* into the
291-
/// vector loop.
292-
void addNewMetadata(Instruction *To, const Instruction *Orig);
293-
294286
/// Set the debug location in the builder using the debug location \p DL.
295287
void setDebugLocFrom(DebugLoc DL);
296288

@@ -339,13 +331,6 @@ struct VPTransformState {
339331
/// The parent loop object for the current scope, or nullptr.
340332
Loop *CurrentParentLoop = nullptr;
341333

342-
/// LoopVersioning. It's only set up (non-null) if memchecks were
343-
/// used.
344-
///
345-
/// This is currently only used to add no-alias metadata based on the
346-
/// memchecks. The actually versioning is performed manually.
347-
LoopVersioning *LVer = nullptr;
348-
349334
/// VPlan-based type analysis.
350335
VPTypeAnalysis TypeAnalysis;
351336

0 commit comments

Comments
 (0)