Skip to content

Commit e517cfc

Browse files
authored
[InstrPGO] Support cold function coverage instrumentation (#109837)
This patch adds support for cold function coverage instrumentation based on sampling PGO counts. The major motivation is to detect dead functions for the services that are optimized with sampling PGO. If a function is covered by sampling profile count (e.g., those with an entry count > 0), we choose to skip instrumenting those functions, which significantly reduces the instrumentation overhead. More details about the implementation and flags: - Added a flag `--pgo-instrument-cold-function-only` in `PGOInstrumentation.cpp` as the main switch to control skipping the instrumentation. - Built the extra instrumentation passes(a bundle of passes in `addPGOInstrPasses`) under sampling PGO pipeline. This is controlled by `--instrument-cold-function-only-path` flag. - Added a driver flag `-fprofile-generate-cold-function-coverage`: - 1) Config the flags in one place, i,e. adding `--instrument-cold-function-only-path=<...>` and `--pgo-function-entry-coverage`. Note that the instrumentation file path is passed through `--instrument-sample-cold-function-path`, because we cannot use the `PGOOptions.ProfileFile` as it's already used by `-fprofile-sample-use=<...>`. - 2) makes linker to link `compiler_rt.profile` lib(see [ToolChain.cpp#L1125-L1131](https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChain.cpp#L1125-L1131) ). - Added a flag(`--pgo-cold-instrument-entry-threshold`) to config entry count to determine cold function. Overall, the full command is like: ``` clang++ -O2 -fprofile-generate-cold-function-coverage=<...> -fprofile-sample-use=<...> code.cc -o code ```
1 parent f147437 commit e517cfc

File tree

8 files changed

+126
-2
lines changed

8 files changed

+126
-2
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
17861786
PosFlag<SetTrue, [], [ClangOption, CC1Option],
17871787
"Emit extra debug info to make sample profile more accurate">,
17881788
NegFlag<SetFalse>>;
1789+
def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">,
1790+
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
1791+
HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
1792+
def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">,
1793+
Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<directory>">,
1794+
HelpText<"Generate instrumented code to collect coverage info for cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
17891795
def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
17901796
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
17911797
HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;

clang/lib/Driver/ToolChain.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
897897
Args.hasArg(options::OPT_fprofile_instr_generate) ||
898898
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
899899
Args.hasArg(options::OPT_fcreate_profile) ||
900-
Args.hasArg(options::OPT_forder_file_instrumentation);
900+
Args.hasArg(options::OPT_forder_file_instrumentation) ||
901+
Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage) ||
902+
Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage_EQ);
901903
}
902904

903905
bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,26 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
632632
}
633633
}
634634

635+
if (auto *ColdFuncCoverageArg = Args.getLastArg(
636+
options::OPT_fprofile_generate_cold_function_coverage,
637+
options::OPT_fprofile_generate_cold_function_coverage_EQ)) {
638+
SmallString<128> Path(
639+
ColdFuncCoverageArg->getOption().matches(
640+
options::OPT_fprofile_generate_cold_function_coverage_EQ)
641+
? ColdFuncCoverageArg->getValue()
642+
: "");
643+
llvm::sys::path::append(Path, "default_%m.profraw");
644+
// FIXME: Idealy the file path should be passed through
645+
// `-fprofile-instrument-path=`(InstrProfileOutput), however, this field is
646+
// shared with other profile use path(see PGOOptions), we need to refactor
647+
// PGOOptions to make it work.
648+
CmdArgs.push_back("-mllvm");
649+
CmdArgs.push_back(Args.MakeArgString(
650+
Twine("--instrument-cold-function-only-path=") + Path));
651+
CmdArgs.push_back("-mllvm");
652+
CmdArgs.push_back("--pgo-function-entry-coverage");
653+
}
654+
635655
Arg *PGOGenArg = nullptr;
636656
if (PGOGenerateArg) {
637657
assert(!CSPGOGenerateArg);
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Test -fprofile-generate-cold-function-coverage
2+
3+
// RUN: rm -rf %t && split-file %s %t
4+
// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s
5+
6+
// CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00"
7+
8+
// CHECK: store i8 0, ptr @__profc_bar, align 1
9+
// CHECK-NOT: @__profc_foo
10+
11+
//--- pgo-cold-func.prof
12+
foo:1:1
13+
1: 1
14+
15+
//--- pgo-cold-func.c
16+
int bar(int x) { return x;}
17+
int foo(int x) {
18+
return x;
19+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: %clang -### -c -fprofile-generate-cold-function-coverage %s 2>&1 | FileCheck %s
2+
// CHECK: "--instrument-cold-function-only-path=default_%m.profraw"
3+
// CHECK: "--pgo-function-entry-coverage"
4+
// CHECK-NOT: "-fprofile-instrument"
5+
// CHECK-NOT: "-fprofile-instrument-path=
6+
7+
// RUN: %clang -### -c -fprofile-generate-cold-function-coverage=dir %s 2>&1 | FileCheck %s --check-prefix=CHECK-EQ
8+
// CHECK-EQ: "--instrument-cold-function-only-path=dir{{/|\\\\}}default_%m.profraw"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,12 @@ static cl::opt<bool> UseLoopVersioningLICM(
296296
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
297297
cl::desc("Enable the experimental Loop Versioning LICM pass"));
298298

299+
static cl::opt<std::string> InstrumentColdFuncOnlyPath(
300+
"instrument-cold-function-only-path", cl::init(""),
301+
cl::desc("File path for cold function only instrumentation"), cl::Hidden);
302+
299303
extern cl::opt<std::string> UseCtxProfile;
304+
extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
300305

301306
namespace llvm {
302307
extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1182,8 +1187,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
11821187
const bool IsCtxProfUse =
11831188
!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
11841189

1190+
// Enable cold function coverage instrumentation if
1191+
// InstrumentColdFuncOnlyPath is provided.
1192+
const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly =
1193+
IsPGOPreLink && !InstrumentColdFuncOnlyPath.empty();
1194+
11851195
if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1186-
IsCtxProfUse)
1196+
IsCtxProfUse || IsColdFuncOnlyInstrGen)
11871197
addPreInlinerPasses(MPM, Level, Phase);
11881198

11891199
// Add all the requested passes for instrumentation PGO, if requested.
@@ -1205,6 +1215,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
12051215
return MPM;
12061216
addPostPGOLoopRotation(MPM, Level);
12071217
MPM.addPass(PGOCtxProfLoweringPass());
1218+
} else if (IsColdFuncOnlyInstrGen) {
1219+
addPGOInstrPasses(
1220+
MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1221+
/* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1222+
/* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
12081223
}
12091224

12101225
if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,20 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
319319
cl::desc("Do not instrument functions with the number of critical edges "
320320
" greater than this threshold."));
321321

322+
static cl::opt<uint64_t> PGOColdInstrumentEntryThreshold(
323+
"pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
324+
cl::desc("For cold function instrumentation, skip instrumenting functions "
325+
"whose entry count is above the given value."));
326+
327+
static cl::opt<bool> PGOTreatUnknownAsCold(
328+
"pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
329+
cl::desc("For cold function instrumentation, treat count unknown(e.g. "
330+
"unprofiled) functions as cold."));
331+
332+
cl::opt<bool> PGOInstrumentColdFunctionOnly(
333+
"pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
334+
cl::desc("Enable cold function only instrumentation."));
335+
322336
extern cl::opt<unsigned> MaxNumVTableAnnotations;
323337

324338
namespace llvm {
@@ -1897,6 +1911,11 @@ static bool skipPGOGen(const Function &F) {
18971911
return true;
18981912
if (F.getInstructionCount() < PGOFunctionSizeThreshold)
18991913
return true;
1914+
if (PGOInstrumentColdFunctionOnly) {
1915+
if (auto EntryCount = F.getEntryCount())
1916+
return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1917+
return !PGOTreatUnknownAsCold;
1918+
}
19001919
return false;
19011920
}
19021921

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -S | FileCheck --check-prefixes=COLD %s
2+
; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-cold-instrument-entry-threshold=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s
3+
; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-treat-unknown-as-cold -S | FileCheck --check-prefixes=UNKNOWN-FUNC %s
4+
5+
; COLD: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
6+
; COLD-NOT: __profn_main
7+
; COLD-NOT: __profn_bar
8+
9+
; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
10+
; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_main, i64 [[#]], i32 1, i32 0)
11+
12+
; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_bar, i64 [[#]], i32 1, i32 0)
13+
; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
14+
15+
16+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
17+
target triple = "x86_64-unknown-linux-gnu"
18+
19+
define void @bar() {
20+
entry:
21+
ret void
22+
}
23+
24+
define void @foo() !prof !0 {
25+
entry:
26+
ret void
27+
}
28+
29+
define i32 @main() !prof !1 {
30+
entry:
31+
ret i32 0
32+
}
33+
34+
!0 = !{!"function_entry_count", i64 0}
35+
!1 = !{!"function_entry_count", i64 1}

0 commit comments

Comments
 (0)