Skip to content

Commit 1a6d60e

Browse files
authored
[ctx_prof] Fix the pre-thinlink "use" case (#102511)
Didn't notice in #101338 that the instrumentation in `llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll` was actually incorrect.
1 parent 373d35d commit 1a6d60e

File tree

5 files changed

+30
-19
lines changed

5 files changed

+30
-19
lines changed

llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class Type;
1919
class PGOCtxProfLoweringPass : public PassInfoMixin<PGOCtxProfLoweringPass> {
2020
public:
2121
explicit PGOCtxProfLoweringPass() = default;
22-
static bool isContextualIRPGOEnabled();
22+
// True if contextual instrumentation is enabled.
23+
static bool isCtxIRPGOInstrEnabled();
2324

2425
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
2526
};

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,13 +1173,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
11731173
const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
11741174
// We don't want to mix pgo ctx gen and pgo gen; we also don't currently
11751175
// enable ctx profiling from the frontend.
1176-
assert(
1177-
!(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
1178-
"Enabling both instrumented FDO and contextual instrumentation is not "
1179-
"supported.");
1176+
assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1177+
"Enabling both instrumented PGO and contextual instrumentation is not "
1178+
"supported.");
11801179
// Enable contextual profiling instrumentation.
11811180
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1182-
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
1181+
PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
11831182
const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
11841183
Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
11851184

@@ -1670,8 +1669,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
16701669
// In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
16711670
// thinlto use the contextual info to perform imports; then use the contextual
16721671
// profile in the post-thinlink phase.
1673-
if (!UseCtxProfile.empty() && !PGOOpt)
1672+
if (!UseCtxProfile.empty() && !PGOOpt) {
1673+
addRequiredLTOPreLinkPasses(MPM);
16741674
return MPM;
1675+
}
16751676

16761677
// Run partial inlining pass to partially inline functions that have
16771678
// large bodies.

llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static cl::list<std::string> ContextRoots(
3030
"root of an interesting graph, which will be profiled independently "
3131
"from other similar graphs."));
3232

33-
bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
33+
bool PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() {
3434
return !ContextRoots.empty();
3535
}
3636

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
321321
" greater than this threshold."));
322322

323323
extern cl::opt<unsigned> MaxNumVTableAnnotations;
324+
extern cl::opt<std::string> UseCtxProfile;
324325

325326
namespace llvm {
326327
// Command line option to turn on CFG dot dump after profile annotation.
@@ -338,18 +339,20 @@ extern cl::opt<bool> EnableVTableProfileUse;
338339
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
339340
} // namespace llvm
340341

342+
bool shouldInstrumentForCtxProf() {
343+
return PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() ||
344+
!UseCtxProfile.empty();
345+
}
341346
bool shouldInstrumentEntryBB() {
342-
return PGOInstrumentEntry ||
343-
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
347+
return PGOInstrumentEntry || shouldInstrumentForCtxProf();
344348
}
345349

346350
// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
347351
// profiling implicitly captures indirect call cases, but not other values.
348352
// Supporting other values is relatively straight-forward - just another counter
349353
// range within the context.
350354
bool isValueProfilingDisabled() {
351-
return DisableValueProfiling ||
352-
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
355+
return DisableValueProfiling || shouldInstrumentForCtxProf();
353356
}
354357

355358
// Return a string describing the branch condition that can be
@@ -902,7 +905,7 @@ static void instrumentOneFunc(
902905
unsigned NumCounters =
903906
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
904907

905-
if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
908+
if (shouldInstrumentForCtxProf()) {
906909
auto *CSIntrinsic =
907910
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
908911
// We want to count the instrumentable callsites, then instrument them. This
@@ -1861,7 +1864,7 @@ static bool InstrumentAllFunctions(
18611864
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
18621865
// For the context-sensitve instrumentation, we should have a separated pass
18631866
// (before LTO/ThinLTO linking) to create these variables.
1864-
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
1867+
if (!IsCS && !shouldInstrumentForCtxProf())
18651868
createIRLevelProfileFlagVar(M, /*IsCS=*/false);
18661869

18671870
Triple TT(M.getTargetTriple());
@@ -2112,7 +2115,7 @@ static bool annotateAllFunctions(
21122115
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
21132116
if (PGOInstrumentEntry.getNumOccurrences() > 0)
21142117
InstrumentFuncEntry = PGOInstrumentEntry;
2115-
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2118+
InstrumentFuncEntry |= shouldInstrumentForCtxProf();
21162119

21172120
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
21182121
for (auto &F : M) {

llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
22
; There is no profile, but that's OK because the prelink does not care about
33
; the content of the profile, just that we intend to use one.
44
; There is no scenario currently of doing ctx profile use without thinlto.
@@ -7,19 +7,22 @@
77

88
declare void @bar()
99

10+
;.
11+
; CHECK: @__profn_foo = private constant [3 x i8] c"foo"
12+
;.
1013
define void @foo(i32 %a, ptr %fct) {
1114
; CHECK-LABEL: define void @foo(
1215
; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
16+
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
1317
; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
1418
; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
1519
; CHECK: [[YES]]:
1620
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
17-
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
18-
; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
21+
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
1922
; CHECK-NEXT: call void [[FCT]](i32 0)
2023
; CHECK-NEXT: br label %[[EXIT:.*]]
2124
; CHECK: [[NO]]:
22-
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
25+
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
2326
; CHECK-NEXT: call void @bar()
2427
; CHECK-NEXT: br label %[[EXIT]]
2528
; CHECK: [[EXIT]]:
@@ -36,3 +39,6 @@ no:
3639
exit:
3740
ret void
3841
}
42+
;.
43+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
44+
;.

0 commit comments

Comments
 (0)