Skip to content

Commit 97dc508

Browse files
authored
[BOLT] Match functions with name similarity (#95884)
A mapping - from namespace to associated binary functions - is used to match function profiles to binary based on the '--name-similarity-function-matching-threshold' flag set edit distance threshold. The flag is set to 0 (exact name matching) by default as it is expensive, requiring the processing of all BFs. Test Plan: Added name-similarity-function-matching.test. On a binary with 5M functions, rewrite passes took ~520s without the flag and ~2018s with the flag set to 20.
1 parent 4eecf3c commit 97dc508

File tree

4 files changed

+191
-0
lines changed

4 files changed

+191
-0
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,10 @@
688688

689689
Use a modified clustering algorithm geared towards minimizing branches
690690

691+
- `--name-similarity-function-matching-threshold=<uint>`
692+
693+
Match functions using namespace and edit distance.
694+
691695
- `--no-inline`
692696

693697
Disable all inlining (overrides other inlining options)

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ class YAMLProfileReader : public ProfileReaderBase {
9393
ProfiledFunctions.emplace(&BF);
9494
}
9595

96+
/// Matches functions with similarly named profiled functions.
97+
uint64_t matchWithNameSimilarity(BinaryContext &BC);
98+
9699
/// Check if the profile uses an event with a given \p Name.
97100
bool usesEvent(StringRef Name) const;
98101
};

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
#include "bolt/Core/BinaryFunction.h"
1212
#include "bolt/Passes/MCF.h"
1313
#include "bolt/Profile/ProfileYAMLMapping.h"
14+
#include "bolt/Utils/NameResolver.h"
1415
#include "bolt/Utils/Utils.h"
1516
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/edit_distance.h"
18+
#include "llvm/Demangle/Demangle.h"
1619
#include "llvm/Support/CommandLine.h"
1720

1821
using namespace llvm;
@@ -24,6 +27,11 @@ extern cl::OptionCategory BoltOptCategory;
2427
extern cl::opt<bool> InferStaleProfile;
2528
extern cl::opt<bool> Lite;
2629

30+
cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
31+
"name-similarity-function-matching-threshold",
32+
cl::desc("Match functions using namespace and edit distance"), cl::init(0),
33+
cl::Hidden, cl::cat(BoltOptCategory));
34+
2735
static llvm::cl::opt<bool>
2836
IgnoreHash("profile-ignore-hash",
2937
cl::desc("ignore hash while reading function profile"),
@@ -350,6 +358,111 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
350358
return false;
351359
}
352360

361+
uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
362+
uint64_t MatchedWithNameSimilarity = 0;
363+
ItaniumPartialDemangler Demangler;
364+
365+
// Demangle and derive namespace from function name.
366+
auto DemangleName = [&](std::string &FunctionName) {
367+
StringRef RestoredName = NameResolver::restore(FunctionName);
368+
return demangle(RestoredName);
369+
};
370+
auto DeriveNameSpace = [&](std::string &DemangledName) {
371+
if (Demangler.partialDemangle(DemangledName.c_str()))
372+
return std::string("");
373+
std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
374+
size_t BufferSize;
375+
char *NameSpace =
376+
Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
377+
return std::string(NameSpace, BufferSize);
378+
};
379+
380+
// Maps namespaces to associated function block counts and gets profile
381+
// function names and namespaces to minimize the number of BFs to process and
382+
// avoid repeated name demangling/namespace derivation.
383+
StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
384+
std::vector<std::string> ProfileBFDemangledNames;
385+
ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
386+
std::vector<std::string> ProfiledBFNamespaces;
387+
ProfiledBFNamespaces.reserve(YamlBP.Functions.size());
388+
389+
for (auto &YamlBF : YamlBP.Functions) {
390+
std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
391+
ProfileBFDemangledNames.push_back(YamlBFDemangledName);
392+
std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
393+
ProfiledBFNamespaces.push_back(YamlBFNamespace);
394+
NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
395+
}
396+
397+
StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;
398+
399+
// Maps namespaces to BFs excluding binary functions with no equal sized
400+
// profiled functions belonging to the same namespace.
401+
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
402+
std::string DemangledName = BF->getDemangledName();
403+
std::string Namespace = DeriveNameSpace(DemangledName);
404+
405+
auto NamespaceToProfiledBFSizesIt =
406+
NamespaceToProfiledBFSizes.find(Namespace);
407+
// Skip if there are no ProfileBFs with a given \p Namespace.
408+
if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
409+
continue;
410+
// Skip if there are no ProfileBFs in a given \p Namespace with
411+
// equal number of blocks.
412+
if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
413+
continue;
414+
auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
415+
if (NamespaceToBFsIt == NamespaceToBFs.end())
416+
NamespaceToBFs[Namespace] = {BF};
417+
else
418+
NamespaceToBFsIt->second.push_back(BF);
419+
}
420+
421+
// Iterates through all profiled functions and binary functions belonging to
422+
// the same namespace and matches based on edit distance threshold.
423+
assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
424+
ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
425+
for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
426+
yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
427+
std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
428+
if (YamlBF.Used)
429+
continue;
430+
// Skip if there are no BFs in a given \p Namespace.
431+
auto It = NamespaceToBFs.find(YamlBFNamespace);
432+
if (It == NamespaceToBFs.end())
433+
continue;
434+
435+
std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
436+
std::vector<BinaryFunction *> BFs = It->second;
437+
unsigned MinEditDistance = UINT_MAX;
438+
BinaryFunction *ClosestNameBF = nullptr;
439+
440+
// Determines BF the closest to the profiled function, in the
441+
// same namespace.
442+
for (BinaryFunction *BF : BFs) {
443+
if (ProfiledFunctions.count(BF))
444+
continue;
445+
if (BF->size() != YamlBF.NumBasicBlocks)
446+
continue;
447+
std::string BFDemangledName = BF->getDemangledName();
448+
unsigned BFEditDistance =
449+
StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
450+
if (BFEditDistance < MinEditDistance) {
451+
MinEditDistance = BFEditDistance;
452+
ClosestNameBF = BF;
453+
}
454+
}
455+
456+
if (ClosestNameBF &&
457+
MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
458+
matchProfileToFunction(YamlBF, *ClosestNameBF);
459+
++MatchedWithNameSimilarity;
460+
}
461+
}
462+
463+
return MatchedWithNameSimilarity;
464+
}
465+
353466
Error YAMLProfileReader::readProfile(BinaryContext &BC) {
354467
if (opts::Verbosity >= 1) {
355468
outs() << "BOLT-INFO: YAML profile with hash: ";
@@ -461,6 +574,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
461574
if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
462575
matchProfileToFunction(YamlBF, *BF);
463576

577+
// Uses name similarity to match functions that were not matched by name.
578+
uint64_t MatchedWithNameSimilarity =
579+
opts::NameSimilarityFunctionMatchingThreshold > 0
580+
? matchWithNameSimilarity(BC)
581+
: 0;
582+
464583
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
465584
if (!YamlBF.Used && opts::Verbosity >= 1)
466585
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
@@ -473,6 +592,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
473592
<< " functions with hash\n";
474593
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
475594
<< " functions with matching LTO common names\n";
595+
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
596+
<< " functions with similar names\n";
476597
}
477598

478599
// Set for parseFunctionProfile().
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
## Tests function matching in YAMLProfileReader by name similarity.
2+
3+
# REQUIRES: system-linux
4+
# RUN: split-file %s %t
5+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
6+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
7+
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
8+
# RUN: --print-cfg --name-similarity-function-matching-threshold=1 --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s
9+
10+
# CHECK: BOLT-INFO: matched 1 functions with similar names
11+
12+
#--- main.s
13+
.globl main
14+
.type main, @function
15+
main:
16+
.cfi_startproc
17+
.LBB00:
18+
pushq %rbp
19+
movq %rsp, %rbp
20+
subq $16, %rsp
21+
testq %rax, %rax
22+
js .LBB03
23+
.LBB01:
24+
jne .LBB04
25+
.LBB02:
26+
nop
27+
.LBB03:
28+
xorl %eax, %eax
29+
addq $16, %rsp
30+
popq %rbp
31+
retq
32+
.LBB04:
33+
xorl %eax, %eax
34+
addq $16, %rsp
35+
popq %rbp
36+
retq
37+
## For relocations against .text
38+
.reloc 0, R_X86_64_NONE
39+
.cfi_endproc
40+
.size main, .-main
41+
42+
#--- yaml
43+
---
44+
header:
45+
profile-version: 1
46+
binary-name: 'hashing-based-function-matching.s.tmp.exe'
47+
binary-build-id: '<unknown>'
48+
profile-flags: [ lbr ]
49+
profile-origin: branch profile reader
50+
profile-events: ''
51+
dfs-order: false
52+
hash-func: xxh3
53+
functions:
54+
- name: main2
55+
fid: 0
56+
hash: 0x0000000000000001
57+
exec: 1
58+
nblocks: 5
59+
blocks:
60+
- bid: 1
61+
insns: 1
62+
succ: [ { bid: 3, cnt: 1} ]
63+
...

0 commit comments

Comments
 (0)