Skip to content

Commit 2b8649f

Browse files
authored
Added feature in llvm-profdata merge to filter functions from the profile (#78378)
`--function=<regex>` Include functions matching regex in the output `--no-function=<regex>` Exclude functions matching regex from the output If both are specified, `--no-function` has a higher precedence if a function name matches both filters
1 parent 3a9ff32 commit 2b8649f

File tree

4 files changed

+151
-3
lines changed

4 files changed

+151
-3
lines changed

llvm/docs/CommandGuide/llvm-profdata.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,16 @@ OPTIONS
217217
The maximum number of functions in a single temporal profile trace. Longer
218218
traces will be truncated. The default value is 1000.
219219

220+
.. option:: --function=<string>
221+
222+
Only keep functions matching the regex in the output, all others are erased
223+
from the profile.
224+
225+
.. option:: --no-function=<string>
226+
227+
Remove functions matching the regex from the profile. If both --function and
228+
--no-function are specified and a function matches both, it is removed.
229+
220230
EXAMPLES
221231
^^^^^^^^
222232
Basic Usage

llvm/include/llvm/ProfileData/SampleProf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,8 @@ class SampleProfileMap
13301330
}
13311331

13321332
size_t erase(const key_type &Key) { return base_type::erase(Key); }
1333+
1334+
iterator erase(iterator It) { return base_type::erase(It); }
13331335
};
13341336

13351337
using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
Test llvm-profdata merge with function filters.
2+
3+
RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" | FileCheck %s --check-prefix=CHECK-FILTER1
4+
RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER1
5+
CHECK-FILTER1: _Z3bari:20301:1437
6+
CHECK-NEXT: 1: 1437
7+
CHECK-NEXT: _Z3fooi:7711:610
8+
CHECK-NEXT: 1: 610
9+
CHECK-NOT: main
10+
11+
RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" --no-function="fooi$" | FileCheck %s --check-prefix=CHECK-FILTER2
12+
CHECK-FILTER2: _Z3bari:20301:1437
13+
CHECK-NEXT: 1: 1437
14+
CHECK-NOT: main
15+
CHECK-NOT: _Z3fooi
16+
17+
RUN: llvm-profdata merge --sample --extbinary --use-md5 -output=%t.0.profdata %p/Inputs/sample-profile.proftext
18+
RUN: llvm-profdata merge --sample %t.0.profdata --text --function="_Z3fooi" | FileCheck %s --check-prefix=CHECK-FILTER-MD5
19+
CHECK-FILTER-MD5: 1228452328526475178:7711:610
20+
CHECK-NEXT: 1: 610
21+
CHECK-NOT: 15822663052811949562
22+
CHECK-NOT: 3727899762981752933
23+
24+
RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" | FileCheck %s --check-prefix=CHECK-FILTER3
25+
RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER3
26+
CHECK-FILTER3: foo
27+
CHECK-NEXT: # Func Hash:
28+
CHECK-NEXT: 10
29+
CHECK-NEXT: # Num Counters:
30+
CHECK-NEXT: 2
31+
CHECK-NEXT: # Counter Values:
32+
CHECK-NEXT: 499500
33+
CHECK-NEXT: 179900
34+
CHECK-NEXT:
35+
CHECK-NEXT: foo2
36+
CHECK-NEXT: # Func Hash:
37+
CHECK-NEXT: 10
38+
CHECK-NEXT: # Num Counters:
39+
CHECK-NEXT: 2
40+
CHECK-NEXT: # Counter Values:
41+
CHECK-NEXT: 500500
42+
CHECK-NEXT: 180100
43+
44+
RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" --no-function="^foo$" | FileCheck %s --check-prefix=CHECK-FILTER4
45+
CHECK-FILTER4: foo2
46+
CHECK-NEXT: # Func Hash:
47+
CHECK-NEXT: 10
48+
CHECK-NEXT: # Num Counters:
49+
CHECK-NEXT: 2
50+
CHECK-NEXT: # Counter Values:
51+
CHECK-NEXT: 500500
52+
CHECK-NEXT: 180100
53+
54+
RUN: llvm-profdata merge --sample %p/Inputs/cs-sample.proftext --text --function="main.*@.*_Z5funcBi" | FileCheck %s --check-prefix=CHECK-FILTER5
55+
CHECK-FILTER5: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
56+
CHECK-NEXT: 0: 15
57+
CHECK-NEXT: 1: 15
58+
CHECK-NEXT: 3: 74946
59+
CHECK-NEXT: 4: 74941 _Z3fibi:82359
60+
CHECK-NEXT: 10: 23324
61+
CHECK-NEXT: 11: 23327 _Z3fibi:25228
62+
CHECK-NEXT: 15: 11
63+
CHECK-NEXT: !Attributes: 1
64+
CHECK-NEXT: [main:3.1 @ _Z5funcBi]:120:19
65+
CHECK-NEXT: 0: 19
66+
CHECK-NEXT: 1: 19 _Z8funcLeafi:20
67+
CHECK-NEXT: 3: 12
68+
CHECK-NEXT: !Attributes: 1
69+

llvm/tools/llvm-profdata/llvm-profdata.cpp

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/Support/MD5.h"
3535
#include "llvm/Support/MemoryBuffer.h"
3636
#include "llvm/Support/Path.h"
37+
#include "llvm/Support/Regex.h"
3738
#include "llvm/Support/ThreadPool.h"
3839
#include "llvm/Support/Threading.h"
3940
#include "llvm/Support/VirtualFileSystem.h"
@@ -131,9 +132,11 @@ cl::opt<std::string>
131132
cl::sub(MergeSubcommand));
132133
cl::opt<std::string> FuncNameFilter(
133134
"function",
134-
cl::desc("Details for matching functions. For overlapping CSSPGO, this "
135-
"takes a function name with calling context."),
136-
cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand));
135+
cl::desc("Only functions matching the filter are shown in the output. For "
136+
"overlapping CSSPGO, this takes a function name with calling "
137+
"context."),
138+
cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
139+
cl::sub(MergeSubcommand));
137140

138141
// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
139142
// factor out the common cl::sub in cl::opt constructor for subcommand-specific
@@ -243,6 +246,10 @@ cl::opt<uint64_t> TemporalProfMaxTraceLength(
243246
cl::sub(MergeSubcommand),
244247
cl::desc("The maximum length of a single temporal profile trace "
245248
"(default: 10000)"));
249+
cl::opt<std::string> FuncNameNegativeFilter(
250+
"no-function", cl::init(""),
251+
cl::sub(MergeSubcommand),
252+
cl::desc("Exclude functions matching the filter from the output."));
246253

247254
cl::opt<FailureMode>
248255
FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
@@ -759,6 +766,62 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
759766
});
760767
}
761768

769+
static StringRef
770+
getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
771+
return Val.first();
772+
}
773+
774+
static std::string
775+
getFuncName(const SampleProfileMap::value_type &Val) {
776+
return Val.second.getContext().toString();
777+
}
778+
779+
template <typename T>
780+
static void filterFunctions(T &ProfileMap) {
781+
bool hasFilter = !FuncNameFilter.empty();
782+
bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
783+
if (!hasFilter && !hasNegativeFilter)
784+
return;
785+
786+
// If filter starts with '?' it is MSVC mangled name, not a regex.
787+
llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
788+
if (hasFilter && FuncNameFilter[0] == '?' &&
789+
ProbablyMSVCMangledName.match(FuncNameFilter))
790+
FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
791+
if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
792+
ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
793+
FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
794+
795+
size_t Count = ProfileMap.size();
796+
llvm::Regex Pattern(FuncNameFilter);
797+
llvm::Regex NegativePattern(FuncNameNegativeFilter);
798+
std::string Error;
799+
if (hasFilter && !Pattern.isValid(Error))
800+
exitWithError(Error);
801+
if (hasNegativeFilter && !NegativePattern.isValid(Error))
802+
exitWithError(Error);
803+
804+
// Handle MD5 profile, so it is still able to match using the original name.
805+
std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
806+
std::string NegativeMD5Name =
807+
std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
808+
809+
for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
810+
auto Tmp = I++;
811+
const auto &FuncName = getFuncName(*Tmp);
812+
// Negative filter has higher precedence than positive filter.
813+
if ((hasNegativeFilter &&
814+
(NegativePattern.match(FuncName) ||
815+
(FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
816+
(hasFilter && !(Pattern.match(FuncName) ||
817+
(FunctionSamples::UseMD5 && MD5Name == FuncName))))
818+
ProfileMap.erase(Tmp);
819+
}
820+
821+
llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
822+
<< "in the original profile are filtered.\n";
823+
}
824+
762825
static void writeInstrProfile(StringRef OutputFilename,
763826
ProfileFormat OutputFormat,
764827
InstrProfWriter &Writer) {
@@ -878,6 +941,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
878941
(NumErrors > 0 && FailMode == failIfAnyAreInvalid))
879942
exitWithError("no profile can be merged");
880943

944+
filterFunctions(Contexts[0]->Writer.getProfileData());
945+
881946
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
882947
}
883948

@@ -1459,6 +1524,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
14591524
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
14601525
}
14611526

1527+
filterFunctions(ProfileMap);
1528+
14621529
auto WriterOrErr =
14631530
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
14641531
if (std::error_code EC = WriterOrErr.getError())

0 commit comments

Comments
 (0)