Skip to content

Commit 66ce4f7

Browse files
authored
[VPlan] Port invalid cost remarks to VPlan. (llvm#99322)
This patch moves the logic to create remarks for instructions with invalid costs to work on recipes and decoupling it from selectVectorizationFactor. This is needed to replace the remaining uses of selectVectorizationFactor with getBestPlan using the VPlan-based cost model. The current implementation iterates over all VPlans and their recipes again, to find recipes with invalid costs, which is more work but will only be done when remarks for LV are enabled. Once the remaining uses of selectVectorizationFactor are retired, we can collect VPlans with invalid costs as part of getBestPlan if we want to optimize the remarks case a bit, at the cost of adding additional complexity. PR: llvm#99322
1 parent f2d2ae3 commit 66ce4f7

File tree

4 files changed

+110
-75
lines changed

4 files changed

+110
-75
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@ class LoopVectorizationPlanner {
411411
VectorizationFactor
412412
selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
413413

414+
/// Emit remarks for recipes with invalid costs in the available VPlans.
415+
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
416+
414417
protected:
415418
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
416419
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 95 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#include "llvm/ADT/Statistic.h"
7676
#include "llvm/ADT/StringRef.h"
7777
#include "llvm/ADT/Twine.h"
78+
#include "llvm/ADT/TypeSwitch.h"
7879
#include "llvm/ADT/iterator_range.h"
7980
#include "llvm/Analysis/AssumptionCache.h"
8081
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -889,20 +890,18 @@ static void debugVectorizationMessage(const StringRef Prefix,
889890
/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
890891
/// RemarkName is the identifier for the remark. If \p I is passed it is an
891892
/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
892-
/// the location of the remark. \return the remark object that can be
893-
/// streamed to.
894-
static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
895-
StringRef RemarkName, Loop *TheLoop, Instruction *I) {
896-
Value *CodeRegion = TheLoop->getHeader();
897-
DebugLoc DL = TheLoop->getStartLoc();
898-
899-
if (I) {
900-
CodeRegion = I->getParent();
901-
// If there is no debug location attached to the instruction, revert back to
902-
// using the loop's.
903-
if (I->getDebugLoc())
904-
DL = I->getDebugLoc();
905-
}
893+
/// the location of the remark. If \p DL is passed, use it as debug location for
894+
/// the remark. \return the remark object that can be streamed to.
895+
static OptimizationRemarkAnalysis
896+
createLVAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop,
897+
Instruction *I, DebugLoc DL = {}) {
898+
Value *CodeRegion = I ? I->getParent() : TheLoop->getHeader();
899+
// If debug location is attached to the instruction, use it. Otherwise if DL
900+
// was not provided, use the loop's.
901+
if (I && I->getDebugLoc())
902+
DL = I->getDebugLoc();
903+
else if (!DL)
904+
DL = TheLoop->getStartLoc();
906905

907906
return OptimizationRemarkAnalysis(PassName, RemarkName, DL, CodeRegion);
908907
}
@@ -943,15 +942,17 @@ void reportVectorizationFailure(const StringRef DebugMsg,
943942

944943
/// Reports an informative message: print \p Msg for debugging purposes as well
945944
/// as an optimization remark. Uses either \p I as location of the remark, or
946-
/// otherwise \p TheLoop.
945+
/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
946+
/// remark. If \p DL is passed, use it as debug location for the remark.
947947
static void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
948-
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
949-
Instruction *I = nullptr) {
948+
OptimizationRemarkEmitter *ORE,
949+
Loop *TheLoop, Instruction *I = nullptr,
950+
DebugLoc DL = {}) {
950951
LLVM_DEBUG(debugVectorizationMessage("", Msg, I));
951952
LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
952-
ORE->emit(
953-
createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, I)
954-
<< Msg);
953+
ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop,
954+
I, DL)
955+
<< Msg);
955956
}
956957

957958
/// Report successful vectorization of the loop. In case an outer loop is
@@ -1538,12 +1539,8 @@ class LoopVectorizationCostModel {
15381539
/// Returns the expected execution cost. The unit of the cost does
15391540
/// not matter because we use the 'cost' units to compare different
15401541
/// vector widths. The cost that is returned is *not* normalized by
1541-
/// the factor width. If \p Invalid is not nullptr, this function
1542-
/// will add a pair(Instruction*, ElementCount) to \p Invalid for
1543-
/// each instruction that has an Invalid cost for the given VF.
1544-
InstructionCost
1545-
expectedCost(ElementCount VF,
1546-
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
1542+
/// the factor width.
1543+
InstructionCost expectedCost(ElementCount VF);
15471544

15481545
bool hasPredStores() const { return NumPredStores > 0; }
15491546

@@ -4350,24 +4347,38 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43504347
return CmpFn(RTCostA, RTCostB);
43514348
}
43524349

4353-
static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4354-
OptimizationRemarkEmitter *ORE,
4355-
Loop *TheLoop) {
4350+
void LoopVectorizationPlanner::emitInvalidCostRemarks(
4351+
OptimizationRemarkEmitter *ORE) {
4352+
using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
4353+
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
4354+
SmallVector<RecipeVFPair> InvalidCosts;
4355+
for (const auto &Plan : VPlans) {
4356+
for (ElementCount VF : Plan->vectorFactors()) {
4357+
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx,
4358+
CM);
4359+
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
4360+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
4361+
for (auto &R : *VPBB) {
4362+
if (!R.cost(VF, CostCtx).isValid())
4363+
InvalidCosts.emplace_back(&R, VF);
4364+
}
4365+
}
4366+
}
4367+
}
43564368
if (InvalidCosts.empty())
43574369
return;
43584370

43594371
// Emit a report of VFs with invalid costs in the loop.
43604372

4361-
// Group the remarks per instruction, keeping the instruction order from
4362-
// InvalidCosts.
4363-
std::map<Instruction *, unsigned> Numbering;
4373+
// Group the remarks per recipe, keeping the recipe order from InvalidCosts.
4374+
DenseMap<VPRecipeBase *, unsigned> Numbering;
43644375
unsigned I = 0;
43654376
for (auto &Pair : InvalidCosts)
43664377
if (!Numbering.count(Pair.first))
43674378
Numbering[Pair.first] = I++;
43684379

4369-
// Sort the list, first on instruction(number) then on VF.
4370-
sort(InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
4380+
// Sort the list, first on recipe(number) then on VF.
4381+
sort(InvalidCosts, [&Numbering](RecipeVFPair &A, RecipeVFPair &B) {
43714382
if (Numbering[A.first] != Numbering[B.first])
43724383
return Numbering[A.first] < Numbering[B.first];
43734384
const auto &LHS = A.second;
@@ -4376,38 +4387,64 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
43764387
std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
43774388
});
43784389

4379-
// For a list of ordered instruction-vf pairs:
4380-
// [(load, vf1), (load, vf2), (store, vf1)]
4381-
// Group the instructions together to emit separate remarks for:
4382-
// load (vf1, vf2)
4383-
// store (vf1)
4384-
auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
4385-
auto Subset = ArrayRef<InstructionVFPair>();
4390+
// For a list of ordered recipe-VF pairs:
4391+
// [(load, VF1), (load, VF2), (store, VF1)]
4392+
// group the recipes together to emit separate remarks for:
4393+
// load (VF1, VF2)
4394+
// store (VF1)
4395+
auto Tail = ArrayRef<RecipeVFPair>(InvalidCosts);
4396+
auto Subset = ArrayRef<RecipeVFPair>();
43864397
do {
43874398
if (Subset.empty())
43884399
Subset = Tail.take_front(1);
43894400

4390-
Instruction *I = Subset.front().first;
4391-
4392-
// If the next instruction is different, or if there are no other pairs,
4401+
VPRecipeBase *R = Subset.front().first;
4402+
4403+
unsigned Opcode =
4404+
TypeSwitch<const VPRecipeBase *, unsigned>(R)
4405+
.Case<VPHeaderPHIRecipe>(
4406+
[](const auto *R) { return Instruction::PHI; })
4407+
.Case<VPWidenSelectRecipe>(
4408+
[](const auto *R) { return Instruction::Select; })
4409+
.Case<VPWidenStoreRecipe>(
4410+
[](const auto *R) { return Instruction::Store; })
4411+
.Case<VPWidenLoadRecipe>(
4412+
[](const auto *R) { return Instruction::Load; })
4413+
.Case<VPWidenCallRecipe>(
4414+
[](const auto *R) { return Instruction::Call; })
4415+
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
4416+
VPWidenCastRecipe>(
4417+
[](const auto *R) { return R->getOpcode(); })
4418+
.Case<VPInterleaveRecipe>([](const VPInterleaveRecipe *R) {
4419+
return R->getStoredValues().empty() ? Instruction::Load
4420+
: Instruction::Store;
4421+
});
4422+
4423+
// If the next recipe is different, or if there are no other pairs,
43934424
// emit a remark for the collated subset. e.g.
4394-
// [(load, vf1), (load, vf2))]
4425+
// [(load, VF1), (load, VF2))]
43954426
// to emit:
4396-
// remark: invalid costs for 'load' at VF=(vf, vf2)
4397-
if (Subset == Tail || Tail[Subset.size()].first != I) {
4427+
// remark: invalid costs for 'load' at VF=(VF1, VF2)
4428+
if (Subset == Tail || Tail[Subset.size()].first != R) {
43984429
std::string OutString;
43994430
raw_string_ostream OS(OutString);
44004431
assert(!Subset.empty() && "Unexpected empty range");
4401-
OS << "Instruction with invalid costs prevented vectorization at VF=(";
4432+
OS << "Recipe with invalid costs prevented vectorization at VF=(";
44024433
for (const auto &Pair : Subset)
44034434
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
44044435
OS << "):";
4405-
if (auto *CI = dyn_cast<CallInst>(I))
4406-
OS << " call to " << CI->getCalledFunction()->getName();
4407-
else
4408-
OS << " " << I->getOpcodeName();
4436+
if (Opcode == Instruction::Call) {
4437+
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4438+
Function *CalledFn =
4439+
WidenCall ? WidenCall->getCalledScalarFunction()
4440+
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
4441+
->getLiveInIRValue());
4442+
OS << " call to " << CalledFn->getName();
4443+
} else
4444+
OS << " " << Instruction::getOpcodeName(Opcode);
44094445
OS.flush();
4410-
reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
4446+
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
4447+
R->getDebugLoc());
44114448
Tail = Tail.drop_front(Subset.size());
44124449
Subset = {};
44134450
} else
@@ -4536,14 +4573,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45364573
ChosenFactor.Cost = InstructionCost::getMax();
45374574
}
45384575

4539-
SmallVector<InstructionVFPair> InvalidCosts;
45404576
for (auto &P : VPlans) {
45414577
for (ElementCount VF : P->vectorFactors()) {
45424578
// The cost for scalar VF=1 is already calculated, so ignore it.
45434579
if (VF.isScalar())
45444580
continue;
45454581

4546-
InstructionCost C = CM.expectedCost(VF, &InvalidCosts);
4582+
InstructionCost C = CM.expectedCost(VF);
45474583
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
45484584

45494585
#ifndef NDEBUG
@@ -4578,8 +4614,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45784614
}
45794615
}
45804616

4581-
emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
4582-
45834617
if (!EnableCondStoresVectorization && CM.hasPredStores()) {
45844618
reportVectorizationFailure(
45854619
"There are conditional stores.",
@@ -5484,8 +5518,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
54845518
return Discount;
54855519
}
54865520

5487-
InstructionCost LoopVectorizationCostModel::expectedCost(
5488-
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5521+
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
54895522
InstructionCost Cost;
54905523

54915524
// For each block.
@@ -5505,10 +5538,6 @@ InstructionCost LoopVectorizationCostModel::expectedCost(
55055538
if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0)
55065539
C = InstructionCost(ForceTargetInstructionCost);
55075540

5508-
// Keep a list of instructions with invalid costs.
5509-
if (Invalid && !C.isValid())
5510-
Invalid->emplace_back(&I, VF);
5511-
55125541
BlockCost += C;
55135542
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF "
55145543
<< VF << " For instruction: " << I << '\n');
@@ -9867,6 +9896,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98679896
// Plan how to best vectorize, return the best VF and its cost.
98689897
std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
98699898

9899+
if (ORE->allowExtraAnalysis(LV_NAME))
9900+
LVP.emitInvalidCostRemarks(ORE);
9901+
98709902
VectorizationFactor VF = VectorizationFactor::Disabled();
98719903
unsigned IC = 1;
98729904

llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
; RUN: FileCheck %s --check-prefix=CHECK-REMARKS < %t
33

44
; CHECK-REMARKS: UserVF ignored because of invalid costs.
5-
; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca
6-
; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
5+
; CHECK-REMARKS: Recipe with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca
6+
; CHECK-REMARKS: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): store
77
define void @alloca(ptr %vla, i64 %N) {
88
; CHECK-LABEL: @alloca(
99
; CHECK-NOT: <vscale x

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ for.end:
101101
}
102102

103103
; CHECK-REMARKS: UserVF ignored because of invalid costs.
104-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
105-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
106-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
104+
; CHECK-REMARKS-NEXT: t.c:3:10: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load
105+
; CHECK-REMARKS-NEXT: t.c:3:20: Recipe with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
106+
; CHECK-REMARKS-NEXT: t.c:3:30: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): store
107107
define void @vec_sin_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
108108
; CHECK: @vec_sin_no_mapping
109109
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -127,10 +127,10 @@ for.cond.cleanup: ; preds = %for.body
127127
}
128128

129129
; CHECK-REMARKS: UserVF ignored because of invalid costs.
130-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
131-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
132-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
133-
; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
130+
; CHECK-REMARKS-NEXT: t.c:3:10: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load
131+
; CHECK-REMARKS-NEXT: t.c:3:30: Recipe with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
132+
; CHECK-REMARKS-NEXT: t.c:3:20: Recipe with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
133+
; CHECK-REMARKS-NEXT: t.c:3:40: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): store
134134
define void @vec_sin_no_mapping_ite(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
135135
; CHECK: @vec_sin_no_mapping_ite
136136
; CHECK-NOT: <vscale x
@@ -163,9 +163,9 @@ for.cond.cleanup: ; preds = %for.body
163163
}
164164

165165
; CHECK-REMARKS: UserVF ignored because of invalid costs.
166-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
167-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
168-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
166+
; CHECK-REMARKS-NEXT: t.c:3:10: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load
167+
; CHECK-REMARKS-NEXT: t.c:3:20: Recipe with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
168+
; CHECK-REMARKS-NEXT: t.c:3:30: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): store
169169
define void @vec_sin_fixed_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
170170
; CHECK: @vec_sin_fixed_mapping
171171
; CHECK: call fast <2 x float> @llvm.sin.v2f32

0 commit comments

Comments
 (0)