75
75
#include " llvm/ADT/Statistic.h"
76
76
#include " llvm/ADT/StringRef.h"
77
77
#include " llvm/ADT/Twine.h"
78
+ #include " llvm/ADT/TypeSwitch.h"
78
79
#include " llvm/ADT/iterator_range.h"
79
80
#include " llvm/Analysis/AssumptionCache.h"
80
81
#include " llvm/Analysis/BasicAliasAnalysis.h"
@@ -889,20 +890,18 @@ static void debugVectorizationMessage(const StringRef Prefix,
889
890
// / \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
890
891
// / RemarkName is the identifier for the remark. If \p I is passed it is an
891
892
// / instruction that prevents vectorization. Otherwise \p TheLoop is used for
892
- // / the location of the remark. \return the remark object that can be
893
- // / streamed to.
894
- static OptimizationRemarkAnalysis createLVAnalysis (const char *PassName,
895
- StringRef RemarkName, Loop *TheLoop, Instruction *I) {
896
- Value *CodeRegion = TheLoop->getHeader ();
897
- DebugLoc DL = TheLoop->getStartLoc ();
898
-
899
- if (I) {
900
- CodeRegion = I->getParent ();
901
- // If there is no debug location attached to the instruction, revert back to
902
- // using the loop's.
903
- if (I->getDebugLoc ())
904
- DL = I->getDebugLoc ();
905
- }
893
+ // / the location of the remark. If \p DL is passed, use it as debug location for
894
+ // / the remark. \return the remark object that can be streamed to.
895
+ static OptimizationRemarkAnalysis
896
+ createLVAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop,
897
+ Instruction *I, DebugLoc DL = {}) {
898
+ Value *CodeRegion = I ? I->getParent () : TheLoop->getHeader ();
899
+ // If debug location is attached to the instruction, use it. Otherwise if DL
900
+ // was not provided, use the loop's.
901
+ if (I && I->getDebugLoc ())
902
+ DL = I->getDebugLoc ();
903
+ else if (!DL)
904
+ DL = TheLoop->getStartLoc ();
906
905
907
906
return OptimizationRemarkAnalysis (PassName, RemarkName, DL, CodeRegion);
908
907
}
@@ -943,15 +942,17 @@ void reportVectorizationFailure(const StringRef DebugMsg,
943
942
944
943
// / Reports an informative message: print \p Msg for debugging purposes as well
945
944
// / as an optimization remark. Uses either \p I as location of the remark, or
946
- // / otherwise \p TheLoop.
945
+ // / otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
946
+ // / remark. If \p DL is passed, use it as debug location for the remark.
947
947
static void reportVectorizationInfo (const StringRef Msg, const StringRef ORETag,
948
- OptimizationRemarkEmitter *ORE, Loop *TheLoop,
949
- Instruction *I = nullptr ) {
948
+ OptimizationRemarkEmitter *ORE,
949
+ Loop *TheLoop, Instruction *I = nullptr ,
950
+ DebugLoc DL = {}) {
950
951
LLVM_DEBUG (debugVectorizationMessage (" " , Msg, I));
951
952
LoopVectorizeHints Hints (TheLoop, true /* doesn't matter */ , *ORE);
952
- ORE->emit (
953
- createLVAnalysis (Hints. vectorizeAnalysisPassName (), ORETag, TheLoop, I )
954
- << Msg);
953
+ ORE->emit (createLVAnalysis (Hints. vectorizeAnalysisPassName (), ORETag, TheLoop,
954
+ I, DL )
955
+ << Msg);
955
956
}
956
957
957
958
// / Report successful vectorization of the loop. In case an outer loop is
@@ -1538,12 +1539,8 @@ class LoopVectorizationCostModel {
1538
1539
// / Returns the expected execution cost. The unit of the cost does
1539
1540
// / not matter because we use the 'cost' units to compare different
1540
1541
// / vector widths. The cost that is returned is *not* normalized by
1541
- // / the factor width. If \p Invalid is not nullptr, this function
1542
- // / will add a pair(Instruction*, ElementCount) to \p Invalid for
1543
- // / each instruction that has an Invalid cost for the given VF.
1544
- InstructionCost
1545
- expectedCost (ElementCount VF,
1546
- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr );
1542
+ // / the factor width.
1543
+ InstructionCost expectedCost (ElementCount VF);
1547
1544
1548
1545
bool hasPredStores () const { return NumPredStores > 0 ; }
1549
1546
@@ -4350,24 +4347,38 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4350
4347
return CmpFn (RTCostA, RTCostB);
4351
4348
}
4352
4349
4353
- static void emitInvalidCostRemarks (SmallVector<InstructionVFPair> InvalidCosts,
4354
- OptimizationRemarkEmitter *ORE,
4355
- Loop *TheLoop) {
4350
+ void LoopVectorizationPlanner::emitInvalidCostRemarks (
4351
+ OptimizationRemarkEmitter *ORE) {
4352
+ using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
4353
+ LLVMContext &LLVMCtx = OrigLoop->getHeader ()->getContext ();
4354
+ SmallVector<RecipeVFPair> InvalidCosts;
4355
+ for (const auto &Plan : VPlans) {
4356
+ for (ElementCount VF : Plan->vectorFactors ()) {
4357
+ VPCostContext CostCtx (CM.TTI , Legal->getWidestInductionType (), LLVMCtx,
4358
+ CM);
4359
+ auto Iter = vp_depth_first_deep (Plan->getVectorLoopRegion ()->getEntry ());
4360
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
4361
+ for (auto &R : *VPBB) {
4362
+ if (!R.cost (VF, CostCtx).isValid ())
4363
+ InvalidCosts.emplace_back (&R, VF);
4364
+ }
4365
+ }
4366
+ }
4367
+ }
4356
4368
if (InvalidCosts.empty ())
4357
4369
return ;
4358
4370
4359
4371
// Emit a report of VFs with invalid costs in the loop.
4360
4372
4361
- // Group the remarks per instruction, keeping the instruction order from
4362
- // InvalidCosts.
4363
- std::map<Instruction *, unsigned > Numbering;
4373
+ // Group the remarks per recipe, keeping the recipe order from InvalidCosts.
4374
+ DenseMap<VPRecipeBase *, unsigned > Numbering;
4364
4375
unsigned I = 0 ;
4365
4376
for (auto &Pair : InvalidCosts)
4366
4377
if (!Numbering.count (Pair.first ))
4367
4378
Numbering[Pair.first ] = I++;
4368
4379
4369
- // Sort the list, first on instruction (number) then on VF.
4370
- sort (InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
4380
+ // Sort the list, first on recipe (number) then on VF.
4381
+ sort (InvalidCosts, [&Numbering](RecipeVFPair &A, RecipeVFPair &B) {
4371
4382
if (Numbering[A.first ] != Numbering[B.first ])
4372
4383
return Numbering[A.first ] < Numbering[B.first ];
4373
4384
const auto &LHS = A.second ;
@@ -4376,38 +4387,64 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4376
4387
std::make_tuple (RHS.isScalable (), RHS.getKnownMinValue ());
4377
4388
});
4378
4389
4379
- // For a list of ordered instruction-vf pairs:
4380
- // [(load, vf1 ), (load, vf2 ), (store, vf1 )]
4381
- // Group the instructions together to emit separate remarks for:
4382
- // load (vf1, vf2 )
4383
- // store (vf1 )
4384
- auto Tail = ArrayRef<InstructionVFPair >(InvalidCosts);
4385
- auto Subset = ArrayRef<InstructionVFPair >();
4390
+ // For a list of ordered recipe-VF pairs:
4391
+ // [(load, VF1 ), (load, VF2 ), (store, VF1 )]
4392
+ // group the recipes together to emit separate remarks for:
4393
+ // load (VF1, VF2 )
4394
+ // store (VF1 )
4395
+ auto Tail = ArrayRef<RecipeVFPair >(InvalidCosts);
4396
+ auto Subset = ArrayRef<RecipeVFPair >();
4386
4397
do {
4387
4398
if (Subset.empty ())
4388
4399
Subset = Tail.take_front (1 );
4389
4400
4390
- Instruction *I = Subset.front ().first ;
4391
-
4392
- // If the next instruction is different, or if there are no other pairs,
4401
+ VPRecipeBase *R = Subset.front ().first ;
4402
+
4403
+ unsigned Opcode =
4404
+ TypeSwitch<const VPRecipeBase *, unsigned >(R)
4405
+ .Case <VPHeaderPHIRecipe>(
4406
+ [](const auto *R) { return Instruction::PHI; })
4407
+ .Case <VPWidenSelectRecipe>(
4408
+ [](const auto *R) { return Instruction::Select; })
4409
+ .Case <VPWidenStoreRecipe>(
4410
+ [](const auto *R) { return Instruction::Store; })
4411
+ .Case <VPWidenLoadRecipe>(
4412
+ [](const auto *R) { return Instruction::Load; })
4413
+ .Case <VPWidenCallRecipe>(
4414
+ [](const auto *R) { return Instruction::Call; })
4415
+ .Case <VPInstruction, VPWidenRecipe, VPReplicateRecipe,
4416
+ VPWidenCastRecipe>(
4417
+ [](const auto *R) { return R->getOpcode (); })
4418
+ .Case <VPInterleaveRecipe>([](const VPInterleaveRecipe *R) {
4419
+ return R->getStoredValues ().empty () ? Instruction::Load
4420
+ : Instruction::Store;
4421
+ });
4422
+
4423
+ // If the next recipe is different, or if there are no other pairs,
4393
4424
// emit a remark for the collated subset. e.g.
4394
- // [(load, vf1 ), (load, vf2 ))]
4425
+ // [(load, VF1 ), (load, VF2 ))]
4395
4426
// to emit:
4396
- // remark: invalid costs for 'load' at VF=(vf, vf2 )
4397
- if (Subset == Tail || Tail[Subset.size ()].first != I ) {
4427
+ // remark: invalid costs for 'load' at VF=(VF1, VF2 )
4428
+ if (Subset == Tail || Tail[Subset.size ()].first != R ) {
4398
4429
std::string OutString;
4399
4430
raw_string_ostream OS (OutString);
4400
4431
assert (!Subset.empty () && " Unexpected empty range" );
4401
- OS << " Instruction with invalid costs prevented vectorization at VF=(" ;
4432
+ OS << " Recipe with invalid costs prevented vectorization at VF=(" ;
4402
4433
for (const auto &Pair : Subset)
4403
4434
OS << (Pair.second == Subset.front ().second ? " " : " , " ) << Pair.second ;
4404
4435
OS << " ):" ;
4405
- if (auto *CI = dyn_cast<CallInst>(I))
4406
- OS << " call to " << CI->getCalledFunction ()->getName ();
4407
- else
4408
- OS << " " << I->getOpcodeName ();
4436
+ if (Opcode == Instruction::Call) {
4437
+ auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4438
+ Function *CalledFn =
4439
+ WidenCall ? WidenCall->getCalledScalarFunction ()
4440
+ : cast<Function>(R->getOperand (R->getNumOperands () - 1 )
4441
+ ->getLiveInIRValue ());
4442
+ OS << " call to " << CalledFn->getName ();
4443
+ } else
4444
+ OS << " " << Instruction::getOpcodeName (Opcode);
4409
4445
OS.flush ();
4410
- reportVectorizationInfo (OutString, " InvalidCost" , ORE, TheLoop, I);
4446
+ reportVectorizationInfo (OutString, " InvalidCost" , ORE, OrigLoop, nullptr ,
4447
+ R->getDebugLoc ());
4411
4448
Tail = Tail.drop_front (Subset.size ());
4412
4449
Subset = {};
4413
4450
} else
@@ -4536,14 +4573,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4536
4573
ChosenFactor.Cost = InstructionCost::getMax ();
4537
4574
}
4538
4575
4539
- SmallVector<InstructionVFPair> InvalidCosts;
4540
4576
for (auto &P : VPlans) {
4541
4577
for (ElementCount VF : P->vectorFactors ()) {
4542
4578
// The cost for scalar VF=1 is already calculated, so ignore it.
4543
4579
if (VF.isScalar ())
4544
4580
continue ;
4545
4581
4546
- InstructionCost C = CM.expectedCost (VF, &InvalidCosts );
4582
+ InstructionCost C = CM.expectedCost (VF);
4547
4583
VectorizationFactor Candidate (VF, C, ScalarCost.ScalarCost );
4548
4584
4549
4585
#ifndef NDEBUG
@@ -4578,8 +4614,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4578
4614
}
4579
4615
}
4580
4616
4581
- emitInvalidCostRemarks (InvalidCosts, ORE, OrigLoop);
4582
-
4583
4617
if (!EnableCondStoresVectorization && CM.hasPredStores ()) {
4584
4618
reportVectorizationFailure (
4585
4619
" There are conditional stores." ,
@@ -5484,8 +5518,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5484
5518
return Discount;
5485
5519
}
5486
5520
5487
- InstructionCost LoopVectorizationCostModel::expectedCost (
5488
- ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5521
+ InstructionCost LoopVectorizationCostModel::expectedCost (ElementCount VF) {
5489
5522
InstructionCost Cost;
5490
5523
5491
5524
// For each block.
@@ -5505,10 +5538,6 @@ InstructionCost LoopVectorizationCostModel::expectedCost(
5505
5538
if (C.isValid () && ForceTargetInstructionCost.getNumOccurrences () > 0 )
5506
5539
C = InstructionCost (ForceTargetInstructionCost);
5507
5540
5508
- // Keep a list of instructions with invalid costs.
5509
- if (Invalid && !C.isValid ())
5510
- Invalid->emplace_back (&I, VF);
5511
-
5512
5541
BlockCost += C;
5513
5542
LLVM_DEBUG (dbgs () << " LV: Found an estimated cost of " << C << " for VF "
5514
5543
<< VF << " For instruction: " << I << ' \n ' );
@@ -9867,6 +9896,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9867
9896
// Plan how to best vectorize, return the best VF and its cost.
9868
9897
std::optional<VectorizationFactor> MaybeVF = LVP.plan (UserVF, UserIC);
9869
9898
9899
+ if (ORE->allowExtraAnalysis (LV_NAME))
9900
+ LVP.emitInvalidCostRemarks (ORE);
9901
+
9870
9902
VectorizationFactor VF = VectorizationFactor::Disabled ();
9871
9903
unsigned IC = 1 ;
9872
9904
0 commit comments