Skip to content

[ctx_prof] Fix the pre-thinlink "use" case #102511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class Type;
class PGOCtxProfLoweringPass : public PassInfoMixin<PGOCtxProfLoweringPass> {
public:
explicit PGOCtxProfLoweringPass() = default;
static bool isContextualIRPGOEnabled();
// True if contextual instrumentation is enabled.
static bool isCtxIRPGOInstrEnabled();

PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1173,13 +1173,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
// We don't want to mix pgo ctx gen and pgo gen; we also don't currently
// enable ctx profiling from the frontend.
assert(
!(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
"Enabling both instrumented FDO and contextual instrumentation is not "
"supported.");
assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
"Enabling both instrumented FDO and contextual instrumentation is not "
"supported.");
// Enable contextual profiling instrumentation.
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
Phase == ThinOrFullLTOPhase::ThinLTOPreLink;

Expand Down Expand Up @@ -1670,8 +1669,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
// thinlto use the contextual info to perform imports; then use the contextual
// profile in the post-thinlink phase.
if (!UseCtxProfile.empty() && !PGOOpt)
if (!UseCtxProfile.empty() && !PGOOpt) {
addRequiredLTOPreLinkPasses(MPM);
return MPM;
}

// Run partial inlining pass to partially inline functions that have
// large bodies.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ static cl::list<std::string> ContextRoots(
"root of an interesting graph, which will be profiled independently "
"from other similar graphs."));

bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
bool PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() {
return !ContextRoots.empty();
}

Expand Down
17 changes: 10 additions & 7 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
" greater than this threshold."));

extern cl::opt<unsigned> MaxNumVTableAnnotations;
extern cl::opt<std::string> UseCtxProfile;

namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
Expand All @@ -338,18 +339,20 @@ extern cl::opt<bool> EnableVTableProfileUse;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm

bool shouldInstrumentForCtxProf() {
return PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() ||
!UseCtxProfile.empty();
}
bool shouldInstrumentEntryBB() {
return PGOInstrumentEntry ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
return PGOInstrumentEntry || shouldInstrumentForCtxProf();
}

// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
// profiling implicitly captures indirect call cases, but not other values.
// Supporting other values is relatively straight-forward - just another counter
// range within the context.
bool isValueProfilingDisabled() {
return DisableValueProfiling ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
return DisableValueProfiling || shouldInstrumentForCtxProf();
}

// Return a string describing the branch condition that can be
Expand Down Expand Up @@ -902,7 +905,7 @@ static void instrumentOneFunc(
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();

if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
if (shouldInstrumentForCtxProf()) {
auto *CSIntrinsic =
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
// We want to count the instrumentable callsites, then instrument them. This
Expand Down Expand Up @@ -1861,7 +1864,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
if (!IsCS && !shouldInstrumentForCtxProf())
createIRLevelProfileFlagVar(M, /*IsCS=*/false);

Triple TT(M.getTargetTriple());
Expand Down Expand Up @@ -2112,7 +2115,7 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
InstrumentFuncEntry = PGOInstrumentEntry;
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
InstrumentFuncEntry |= shouldInstrumentForCtxProf();

bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
Expand Down
14 changes: 10 additions & 4 deletions llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; There is no profile, but that's OK because the prelink does not care about
; the content of the profile, just that we intend to use one.
; There is no scenario currently of doing ctx profile use without thinlto.
Expand All @@ -7,19 +7,22 @@

declare void @bar()

;.
; CHECK: @__profn_foo = private constant [3 x i8] c"foo"
;.
define void @foo(i32 %a, ptr %fct) {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did this get hoisted out from the no: block? The only functional change I could find in this patch was addLTOPrelinkPasses(MPM) which adds CanonicalizeAliasesPass and NameAnonGlobalPass.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's the changes in PGOInstrumentation that do that.

; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
; CHECK: [[YES]]:
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
; CHECK-NEXT: call void [[FCT]](i32 0)
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[NO]]:
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
Expand All @@ -36,3 +39,6 @@ no:
exit:
ret void
}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
;.
Loading