Skip to content

Commit ac1a1e5

Browse files
[ThinLTO][TypeProf] Import local-linkage global var for mod1:func_foo-> mod2:local-var edge (#100448)
VTable value profiling can create reference edges from `mod1:func_foo` to `mod2:local-vtable`. Indirect call profiling can create reference edges from `mod1:func_foo` to `mod2:local_func_bar`. Given a ref chain `mod1:func_foo -> mod2:local-var`,`local-var` doesn't get imported by default. Compiler checks / requires the module of 'local-var' is the same as the function that referenced it(`mod1:func_foo`). This is to prevent mis-compilation when both `mod1` and `mod2` has `local-var` of the same name, and cpp files are compiled without full path. This patch allows the import when one of the following conditions happen: 1) Introduce an option `import-assume-local-unique`. When the compiler user can guarantee that all files are compiled with full paths, they can set this option. 2) When there is one instance of value summary. Test: * A/B testing this option alone gives -0.16% statistically consistent cpu cycle reduction on one search workload (no throughput increase) * Testing it together with existing more-efficient ICP bumps the throughput increase by a margin (0.05%~0.1%) * No regressions observed.
1 parent 2ba1aee commit ac1a1e5

File tree

2 files changed

+85
-51
lines changed

2 files changed

+85
-51
lines changed

compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
66
// UNSUPPORTED: ppc && host-byteorder-big-endian
77

8-
// RUN: rm -rf %t && mkdir %t && cd %t
8+
// RUN: rm -rf %t && mkdir %t && split-file %s %t && cd %t
99

10-
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
10+
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling lib.cpp main.cpp -o test
1111
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test
1212

1313
// Show vtable profiles from raw profile.
@@ -37,23 +37,23 @@
3737
// COMMON-NEXT: Number of instrumented vtables: 2
3838
// RAW: Indirect Target Results:
3939
// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
40-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
40+
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
4141
// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
42-
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
42+
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
4343
// RAW-NEXT: VTable Results:
4444
// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
45-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
45+
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
4646
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
47-
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
47+
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
4848
// INDEXED: Indirect Target Results:
49-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
49+
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
5050
// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
51-
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
51+
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
5252
// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
5353
// INDEXED-NEXT: VTable Results:
54-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
54+
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
5555
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
56-
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
56+
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
5757
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
5858
// COMMON: Instrumentation level: IR entry_first = 0
5959
// COMMON-NEXT: Functions shown: 1
@@ -93,27 +93,27 @@
9393
// ICTEXT: # NumValueSites:
9494
// ICTEXT: 2
9595
// ICTEXT: 2
96-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
96+
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
9797
// ICTEXT: _ZN8Derived14funcEii:50
9898
// ICTEXT: 2
99-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
99+
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
100100
// ICTEXT: _ZN8Derived1D0Ev:250
101101
// ICTEXT: # ValueKind = IPVK_VTableTarget:
102102
// ICTEXT: 2
103103
// ICTEXT: # NumValueSites:
104104
// ICTEXT: 2
105105
// ICTEXT: 2
106-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
106+
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
107107
// ICTEXT: _ZTV8Derived1:50
108108
// ICTEXT: 2
109-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
109+
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
110110
// ICTEXT: _ZTV8Derived1:250
111111

112112
// When vtable value profiles exist, pgo-instr-use pass should annotate them
113113
// even if `-enable-vtable-value-profiling` is not explicitly on.
114114
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
115115
// RUN: -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \
116-
// RUN: -mllvm -print-module-scope %s 2>&1 | FileCheck %s --check-prefix=ANNOTATE
116+
// RUN: -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | FileCheck %s --check-prefix=ANNOTATE
117117

118118
// ANNOTATE-NOT: Inconsistent number of value sites
119119
// ANNOTATE: !{!"VP", i32 2
@@ -122,7 +122,7 @@
122122
// if `-icp-max-num-vtables` is set to zero.
123123
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
124124
// RUN: -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \
125-
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope %s 2>&1 | \
125+
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | \
126126
// RUN: FileCheck %s --check-prefix=OMIT
127127

128128
// OMIT: Inconsistent number of value sites
@@ -141,28 +141,29 @@
141141
// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
142142
// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
143143
// RUN: -mllvm -enable-vtable-profile-use \
144+
// RUN: -Wl,-plugin-opt,-import-assume-unique-local \
144145
// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
145146
// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
146-
// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
147+
// RUN: -Wl,-mllvm,-filter-print-funcs=main lib.cpp main.cpp 2>&1 \
147148
// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
148149

149150
// For the indirect call site `ptr->func`
150-
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
151-
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
151+
// REMARK: main.cpp:10:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
152+
// REMARK: main.cpp:10:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
152153
//
153154
// For the indirect call site `delete ptr`
154-
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
155-
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
155+
// REMARK: main.cpp:12:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev.llvm.{{.*}} with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
156+
// REMARK: main.cpp:12:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
156157

157158
// The IR matchers for indirect callsite `ptr->func`.
158159
// IR-LABEL: @main
159160
// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
160161
// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
161-
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
162+
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}, i32 16)
162163
// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
163164
//
164165
// IR: [[BB1]]:
165-
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
166+
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}}
166167
// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
167168
//
168169
// IR: [[BB2]]:
@@ -185,6 +186,7 @@
185186
// IR: [[MERGE0]]:
186187
// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]
187188

189+
//--- lib.h
188190
#include <stdio.h>
189191
#include <stdlib.h>
190192
class Base {
@@ -193,12 +195,19 @@ class Base {
193195

194196
virtual ~Base() {};
195197
};
198+
196199
class Derived1 : public Base {
197200
public:
198-
int func(int a, int b) override { return a * b; }
201+
int func(int a, int b) override;
199202

200203
~Derived1() {}
201204
};
205+
206+
__attribute__((noinline)) Base *createType(int a);
207+
208+
//--- lib.cpp
209+
#include "lib.h"
210+
202211
namespace {
203212
class Derived2 : public Base {
204213
public:
@@ -207,7 +216,10 @@ class Derived2 : public Base {
207216
~Derived2() {}
208217
};
209218
} // namespace
210-
__attribute__((noinline)) Base *createType(int a) {
219+
220+
int Derived1::func(int a, int b) { return a * b; }
221+
222+
Base *createType(int a) {
211223
Base *base = nullptr;
212224
if (a % 4 == 0)
213225
base = new Derived1();
@@ -216,6 +228,9 @@ __attribute__((noinline)) Base *createType(int a) {
216228
return base;
217229
}
218230

231+
//--- main.cpp
232+
#include "lib.h"
233+
219234
int main(int argc, char **argv) {
220235
int sum = 0;
221236
for (int i = 0; i < 1000; i++) {

llvm/lib/Transforms/IPO/FunctionImport.cpp

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,17 @@ static cl::opt<std::string> WorkloadDefinitions(
174174
"}"),
175175
cl::Hidden);
176176

177+
static cl::opt<bool> ImportAssumeUniqueLocal(
178+
"import-assume-unique-local", cl::init(false),
179+
cl::desc(
180+
"By default, a local-linkage global variable won't be imported in the "
181+
"edge mod1:func -> mod2:local-var (from value profiles) since compiler "
182+
"cannot assume mod2 is compiled with full path which gives local-var a "
183+
"program-wide unique GUID. Set this option to true will help cross "
184+
"module import of such variables. This is only safe if the compiler "
185+
"user specify the full module path."),
186+
cl::Hidden);
187+
177188
namespace llvm {
178189
extern cl::opt<bool> EnableMemProfContextDisambiguation;
179190
}
@@ -196,6 +207,23 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
196207
return Result;
197208
}
198209

210+
static bool shouldSkipLocalInAnotherModule(const GlobalVarSummary *RefSummary,
211+
size_t NumDefs,
212+
StringRef ImporterModule) {
213+
// We can import a local from another module if all inputs are compiled
214+
// with full paths or when there is one definition.
215+
if (ImportAssumeUniqueLocal || NumDefs == 1)
216+
return false;
217+
// In other cases, make sure we import the copy in the caller's module if the
218+
// referenced value has local linkage. The only time a local variable can
219+
// share an entry in the index is if there is a local with the same name in
220+
// another module that had the same source file name (in a different
221+
// directory), where each was compiled in their own directory so there was not
222+
// distinguishing path.
223+
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
224+
RefSummary->modulePath() != ImporterModule;
225+
}
226+
199227
/// Given a list of possible callee implementation for a call site, qualify the
200228
/// legality of importing each. The return is a range of pairs. Each pair
201229
/// corresponds to a candidate. The first value is the ImportFailureReason for
@@ -228,19 +256,21 @@ static auto qualifyCalleeCandidates(
228256
if (!Summary)
229257
return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
230258

231-
// If this is a local function, make sure we import the copy
232-
// in the caller's module. The only time a local function can
233-
// share an entry in the index is if there is a local with the same name
234-
// in another module that had the same source file name (in a different
235-
// directory), where each was compiled in their own directory so there
236-
// was not distinguishing path.
237-
// However, do the import from another module if there is only one
238-
// entry in the list - in that case this must be a reference due
239-
// to indirect call profile data, since a function pointer can point to
240-
// a local in another module.
241-
if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
242-
CalleeSummaryList.size() > 1 &&
243-
Summary->modulePath() != CallerModulePath)
259+
// If this is a local function, make sure we import the copy in the
260+
// caller's module. The only time a local function can share an entry in
261+
// the index is if there is a local with the same name in another module
262+
// that had the same source file name (in a different directory), where
263+
// each was compiled in their own directory so there was not
264+
// distinguishing path.
265+
// If the local function is from another module, it must be a reference
266+
// due to indirect call profile data since a function pointer can point
267+
// to a local in another module. Do the import from another module if
268+
// there is only one entry in the list or when all files in the program
269+
// are compiled with full path - in both cases the local function has
270+
// unique PGO name and GUID.
271+
if (shouldSkipLocalInAnotherModule(dyn_cast<GlobalVarSummary>(Summary),
272+
CalleeSummaryList.size(),
273+
CallerModulePath))
244274
return {
245275
FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
246276
GVSummary};
@@ -359,18 +389,6 @@ class GlobalsImporter final {
359389

360390
LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
361391

362-
// If this is a local variable, make sure we import the copy
363-
// in the caller's module. The only time a local variable can
364-
// share an entry in the index is if there is a local with the same name
365-
// in another module that had the same source file name (in a different
366-
// directory), where each was compiled in their own directory so there
367-
// was not distinguishing path.
368-
auto LocalNotInModule =
369-
[&](const GlobalValueSummary *RefSummary) -> bool {
370-
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
371-
RefSummary->modulePath() != Summary.modulePath();
372-
};
373-
374392
for (const auto &RefSummary : VI.getSummaryList()) {
375393
const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
376394
// Functions could be referenced by global vars - e.g. a vtable; but we
@@ -379,7 +397,8 @@ class GlobalsImporter final {
379397
// based on profile information). Should we decide to handle them here,
380398
// we can refactor accordingly at that time.
381399
if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
382-
LocalNotInModule(GVS))
400+
shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
401+
Summary.modulePath()))
383402
continue;
384403

385404
// If there isn't an entry for GUID, insert <GUID, Definition> pair.

0 commit comments

Comments
 (0)