75
75
#include "llvm/ADT/Statistic.h"
76
76
#include "llvm/ADT/StringRef.h"
77
77
#include "llvm/ADT/Twine.h"
78
+ #include "llvm/ADT/TypeSwitch.h"
78
79
#include "llvm/ADT/iterator_range.h"
79
80
#include "llvm/Analysis/AssumptionCache.h"
80
81
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -889,20 +890,18 @@ static void debugVectorizationMessage(const StringRef Prefix,
889
890
/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
890
891
/// RemarkName is the identifier for the remark. If \p I is passed it is an
891
892
/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
892
- /// the location of the remark. \return the remark object that can be
893
- /// streamed to.
894
- static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
895
- StringRef RemarkName, Loop *TheLoop, Instruction *I) {
896
- Value *CodeRegion = TheLoop->getHeader();
897
- DebugLoc DL = TheLoop->getStartLoc();
898
-
899
- if (I) {
900
- CodeRegion = I->getParent();
901
- // If there is no debug location attached to the instruction, revert back to
902
- // using the loop's.
903
- if (I->getDebugLoc())
904
- DL = I->getDebugLoc();
905
- }
893
+ /// the location of the remark. If \p DL is passed, use it as debug location for
894
+ /// the remark. \return the remark object that can be streamed to.
895
+ static OptimizationRemarkAnalysis
896
+ createLVAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop,
897
+ Instruction *I, DebugLoc DL = {}) {
898
+ Value *CodeRegion = I ? I->getParent() : TheLoop->getHeader();
899
+ // If debug location is attached to the instruction, use it. Otherwise if DL
900
+ // was not provided, use the loop's.
901
+ if (I && I->getDebugLoc())
902
+ DL = I->getDebugLoc();
903
+ else if (!DL)
904
+ DL = TheLoop->getStartLoc();
906
905
907
906
return OptimizationRemarkAnalysis(PassName, RemarkName, DL, CodeRegion);
908
907
}
@@ -943,15 +942,17 @@ void reportVectorizationFailure(const StringRef DebugMsg,
943
942
944
943
/// Reports an informative message: print \p Msg for debugging purposes as well
945
944
/// as an optimization remark. Uses either \p I as location of the remark, or
946
- /// otherwise \p TheLoop.
945
+ /// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
946
+ /// remark. If \p DL is passed, use it as debug location for the remark.
947
947
static void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
948
- OptimizationRemarkEmitter *ORE, Loop *TheLoop,
949
- Instruction *I = nullptr) {
948
+ OptimizationRemarkEmitter *ORE,
949
+ Loop *TheLoop, Instruction *I = nullptr,
950
+ DebugLoc DL = {}) {
950
951
LLVM_DEBUG(debugVectorizationMessage("", Msg, I));
951
952
LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
952
- ORE->emit(
953
- createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, I )
954
- << Msg);
953
+ ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop,
954
+ I, DL )
955
+ << Msg);
955
956
}
956
957
957
958
/// Report successful vectorization of the loop. In case an outer loop is
@@ -1538,12 +1539,8 @@ class LoopVectorizationCostModel {
1538
1539
/// Returns the expected execution cost. The unit of the cost does
1539
1540
/// not matter because we use the 'cost' units to compare different
1540
1541
/// vector widths. The cost that is returned is *not* normalized by
1541
- /// the factor width. If \p Invalid is not nullptr, this function
1542
- /// will add a pair(Instruction*, ElementCount) to \p Invalid for
1543
- /// each instruction that has an Invalid cost for the given VF.
1544
- InstructionCost
1545
- expectedCost(ElementCount VF,
1546
- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
1542
+ /// the factor width.
1543
+ InstructionCost expectedCost(ElementCount VF);
1547
1544
1548
1545
bool hasPredStores() const { return NumPredStores > 0; }
1549
1546
@@ -4350,24 +4347,38 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4350
4347
return CmpFn(RTCostA, RTCostB);
4351
4348
}
4352
4349
4353
- static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4354
- OptimizationRemarkEmitter *ORE,
4355
- Loop *TheLoop) {
4350
+ void LoopVectorizationPlanner::emitInvalidCostRemarks(
4351
+ OptimizationRemarkEmitter *ORE) {
4352
+ using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
4353
+ LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
4354
+ SmallVector<RecipeVFPair> InvalidCosts;
4355
+ for (const auto &Plan : VPlans) {
4356
+ for (ElementCount VF : Plan->vectorFactors()) {
4357
+ VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx,
4358
+ CM);
4359
+ auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
4360
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
4361
+ for (auto &R : *VPBB) {
4362
+ if (!R.cost(VF, CostCtx).isValid())
4363
+ InvalidCosts.emplace_back(&R, VF);
4364
+ }
4365
+ }
4366
+ }
4367
+ }
4356
4368
if (InvalidCosts.empty())
4357
4369
return;
4358
4370
4359
4371
// Emit a report of VFs with invalid costs in the loop.
4360
4372
4361
- // Group the remarks per instruction, keeping the instruction order from
4362
- // InvalidCosts.
4363
- std::map<Instruction *, unsigned> Numbering;
4373
+ // Group the remarks per recipe, keeping the recipe order from InvalidCosts.
4374
+ DenseMap<VPRecipeBase *, unsigned> Numbering;
4364
4375
unsigned I = 0;
4365
4376
for (auto &Pair : InvalidCosts)
4366
4377
if (!Numbering.count(Pair.first))
4367
4378
Numbering[Pair.first] = I++;
4368
4379
4369
- // Sort the list, first on instruction (number) then on VF.
4370
- sort(InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
4380
+ // Sort the list, first on recipe (number) then on VF.
4381
+ sort(InvalidCosts, [&Numbering](RecipeVFPair &A, RecipeVFPair &B) {
4371
4382
if (Numbering[A.first] != Numbering[B.first])
4372
4383
return Numbering[A.first] < Numbering[B.first];
4373
4384
const auto &LHS = A.second;
@@ -4376,38 +4387,64 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4376
4387
std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
4377
4388
});
4378
4389
4379
- // For a list of ordered instruction-vf pairs:
4380
- // [(load, vf1 ), (load, vf2 ), (store, vf1 )]
4381
- // Group the instructions together to emit separate remarks for:
4382
- // load (vf1, vf2 )
4383
- // store (vf1 )
4384
- auto Tail = ArrayRef<InstructionVFPair >(InvalidCosts);
4385
- auto Subset = ArrayRef<InstructionVFPair >();
4390
+ // For a list of ordered recipe-VF pairs:
4391
+ // [(load, VF1 ), (load, VF2 ), (store, VF1 )]
4392
+ // group the recipes together to emit separate remarks for:
4393
+ // load (VF1, VF2 )
4394
+ // store (VF1 )
4395
+ auto Tail = ArrayRef<RecipeVFPair >(InvalidCosts);
4396
+ auto Subset = ArrayRef<RecipeVFPair >();
4386
4397
do {
4387
4398
if (Subset.empty())
4388
4399
Subset = Tail.take_front(1);
4389
4400
4390
- Instruction *I = Subset.front().first;
4391
-
4392
- // If the next instruction is different, or if there are no other pairs,
4401
+ VPRecipeBase *R = Subset.front().first;
4402
+
4403
+ unsigned Opcode =
4404
+ TypeSwitch<const VPRecipeBase *, unsigned>(R)
4405
+ .Case<VPHeaderPHIRecipe>(
4406
+ [](const auto *R) { return Instruction::PHI; })
4407
+ .Case<VPWidenSelectRecipe>(
4408
+ [](const auto *R) { return Instruction::Select; })
4409
+ .Case<VPWidenStoreRecipe>(
4410
+ [](const auto *R) { return Instruction::Store; })
4411
+ .Case<VPWidenLoadRecipe>(
4412
+ [](const auto *R) { return Instruction::Load; })
4413
+ .Case<VPWidenCallRecipe>(
4414
+ [](const auto *R) { return Instruction::Call; })
4415
+ .Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
4416
+ VPWidenCastRecipe>(
4417
+ [](const auto *R) { return R->getOpcode(); })
4418
+ .Case<VPInterleaveRecipe>([](const VPInterleaveRecipe *R) {
4419
+ return R->getStoredValues().empty() ? Instruction::Load
4420
+ : Instruction::Store;
4421
+ });
4422
+
4423
+ // If the next recipe is different, or if there are no other pairs,
4393
4424
// emit a remark for the collated subset. e.g.
4394
- // [(load, vf1 ), (load, vf2 ))]
4425
+ // [(load, VF1 ), (load, VF2 ))]
4395
4426
// to emit:
4396
- // remark: invalid costs for 'load' at VF=(vf, vf2 )
4397
- if (Subset == Tail || Tail[Subset.size()].first != I ) {
4427
+ // remark: invalid costs for 'load' at VF=(VF1, VF2 )
4428
+ if (Subset == Tail || Tail[Subset.size()].first != R ) {
4398
4429
std::string OutString;
4399
4430
raw_string_ostream OS(OutString);
4400
4431
assert(!Subset.empty() && "Unexpected empty range");
4401
- OS << "Instruction with invalid costs prevented vectorization at VF=(";
4432
+ OS << "Recipe with invalid costs prevented vectorization at VF=(";
4402
4433
for (const auto &Pair : Subset)
4403
4434
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
4404
4435
OS << "):";
4405
- if (auto *CI = dyn_cast<CallInst>(I))
4406
- OS << " call to " << CI->getCalledFunction()->getName();
4407
- else
4408
- OS << " " << I->getOpcodeName();
4436
+ if (Opcode == Instruction::Call) {
4437
+ auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4438
+ Function *CalledFn =
4439
+ WidenCall ? WidenCall->getCalledScalarFunction()
4440
+ : cast<Function>(R->getOperand(R->getNumOperands() - 1)
4441
+ ->getLiveInIRValue());
4442
+ OS << " call to " << CalledFn->getName();
4443
+ } else
4444
+ OS << " " << Instruction::getOpcodeName(Opcode);
4409
4445
OS.flush();
4410
- reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
4446
+ reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
4447
+ R->getDebugLoc());
4411
4448
Tail = Tail.drop_front(Subset.size());
4412
4449
Subset = {};
4413
4450
} else
@@ -4536,14 +4573,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4536
4573
ChosenFactor.Cost = InstructionCost::getMax();
4537
4574
}
4538
4575
4539
- SmallVector<InstructionVFPair> InvalidCosts;
4540
4576
for (auto &P : VPlans) {
4541
4577
for (ElementCount VF : P->vectorFactors()) {
4542
4578
// The cost for scalar VF=1 is already calculated, so ignore it.
4543
4579
if (VF.isScalar())
4544
4580
continue;
4545
4581
4546
- InstructionCost C = CM.expectedCost(VF, &InvalidCosts );
4582
+ InstructionCost C = CM.expectedCost(VF);
4547
4583
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
4548
4584
4549
4585
#ifndef NDEBUG
@@ -4578,8 +4614,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4578
4614
}
4579
4615
}
4580
4616
4581
- emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
4582
-
4583
4617
if (!EnableCondStoresVectorization && CM.hasPredStores()) {
4584
4618
reportVectorizationFailure(
4585
4619
"There are conditional stores.",
@@ -5484,8 +5518,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5484
5518
return Discount;
5485
5519
}
5486
5520
5487
- InstructionCost LoopVectorizationCostModel::expectedCost(
5488
- ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5521
+ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
5489
5522
InstructionCost Cost;
5490
5523
5491
5524
// For each block.
@@ -5505,10 +5538,6 @@ InstructionCost LoopVectorizationCostModel::expectedCost(
5505
5538
if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0)
5506
5539
C = InstructionCost(ForceTargetInstructionCost);
5507
5540
5508
- // Keep a list of instructions with invalid costs.
5509
- if (Invalid && !C.isValid())
5510
- Invalid->emplace_back(&I, VF);
5511
-
5512
5541
BlockCost += C;
5513
5542
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF "
5514
5543
<< VF << " For instruction: " << I << '\n');
@@ -9867,6 +9896,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9867
9896
// Plan how to best vectorize, return the best VF and its cost.
9868
9897
std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
9869
9898
9899
+ if (ORE->allowExtraAnalysis(LV_NAME))
9900
+ LVP.emitInvalidCostRemarks(ORE);
9901
+
9870
9902
VectorizationFactor VF = VectorizationFactor::Disabled();
9871
9903
unsigned IC = 1;
9872
9904
0 commit comments