Skip to content

Commit b38773b

Browse files
mtrofinronlieb
authored andcommitted
[ctx_prof] Remove the dependency on the "name" GlobalVariable (llvm#105731)
We don't need that name variable for contextual instrumentation, we just use the function to get its GUID which we pass to the runtime, and rely on metadata to capture it through the various optimization passes. This change removes the need for the name global variable. Change-Id: I06e95c9a9c74daa1529f8282227e868e55802a6d
1 parent 9c6866b commit b38773b

File tree

5 files changed

+95
-57
lines changed

5 files changed

+95
-57
lines changed

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,8 +1578,6 @@ class InstrProfInstBase : public IntrinsicInst {
15781578
return const_cast<Value *>(getArgOperand(0))->stripPointerCasts();
15791579
}
15801580

1581-
void setNameValue(Value *V) { setArgOperand(0, V); }
1582-
15831581
// The hash of the CFG for the instrumented function.
15841582
ConstantInt *getHash() const {
15851583
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));

llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
226226

227227
IRBuilder<> Builder(Mark);
228228

229-
Guid = Builder.getInt64(AssignGUIDPass::getGUID(F));
229+
Guid = Builder.getInt64(
230+
AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue())));
230231
// The type of the context of this function is now knowable since we have
231232
// NumCallsites and NumCounters. We delcare it here because it's more
232233
// convenient - we have the Builder.

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
464464
VisitMode Mode = VM_counting; // Visiting mode.
465465
unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
466466
unsigned TotalNumCtrs = 0; // Total number of counters
467-
GlobalVariable *FuncNameVar = nullptr;
467+
GlobalValue *FuncNameVar = nullptr;
468468
uint64_t FuncHash = 0;
469469
PGOUseFunc *UseFunc = nullptr;
470470
bool HasSingleByteCoverage;
@@ -482,7 +482,7 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
482482
// Ind is a pointer to the counter index variable; \p TotalNC
483483
// is the total number of counters; \p FNV is the pointer to the
484484
// PGO function name var; \p FHash is the function hash.
485-
void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
485+
void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
486486
uint64_t FHash) {
487487
Mode = VM_instrument;
488488
CurCtrIdx = Ind;
@@ -901,13 +901,14 @@ void FunctionInstrumenter::instrument() {
901901
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
902902
}
903903

904+
const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
904905
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
905-
F, TLI, ComdatMembers, true, BPI, BFI,
906+
F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI,
906907
InstrumentationType == PGOInstrumentationType::CSFDO,
907908
shouldInstrumentEntryBB(), PGOBlockCoverage);
908909

909-
auto Name = FuncInfo.FuncNameVar;
910-
auto CFGHash =
910+
auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
911+
auto *const CFGHash =
911912
ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
912913
// Make sure that pointer to global is passed in with zero addrspace
913914
// This is relevant during GPU profiling
@@ -929,7 +930,7 @@ void FunctionInstrumenter::instrument() {
929930
unsigned NumCounters =
930931
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
931932

932-
if (InstrumentationType == PGOInstrumentationType::CTXPROF) {
933+
if (IsCtxProf) {
933934
auto *CSIntrinsic =
934935
Intrinsic::getDeclaration(&M, Intrinsic::instrprof_callsite);
935936
// We want to count the instrumentable callsites, then instrument them. This
@@ -995,7 +996,7 @@ void FunctionInstrumenter::instrument() {
995996
}
996997

997998
// Now instrument select instructions:
998-
FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
999+
FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
9991000
FuncInfo.FunctionHash);
10001001
assert(I == NumCounters);
10011002

llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll

Lines changed: 81 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
2-
; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
2+
; RUN: opt -passes=ctx-instr-gen -profile-context-root=an_entrypoint \
33
; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
4-
; RUN: opt -passes=pgo-instr-gen,assign-guid,ctx-instr-lower -profile-context-root=an_entrypoint \
4+
; RUN: opt -passes=ctx-instr-gen,assign-guid,ctx-instr-lower -profile-context-root=an_entrypoint \
55
; RUN: -profile-context-root=another_entrypoint_no_callees \
66
; RUN: -S < %s | FileCheck --check-prefix=LOWERING %s
77

88

99
declare void @bar()
1010

1111
;.
12-
; INSTRUMENT: @__llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat
13-
; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
14-
; INSTRUMENT: @__profn_an_entrypoint = private constant [13 x i8] c"an_entrypoint"
15-
; INSTRUMENT: @__profn_another_entrypoint_no_callees = private constant [29 x i8] c"another_entrypoint_no_callees"
16-
; INSTRUMENT: @__profn_simple = private constant [6 x i8] c"simple"
17-
; INSTRUMENT: @__profn_no_callsites = private constant [12 x i8] c"no_callsites"
18-
; INSTRUMENT: @__profn_no_counters = private constant [11 x i8] c"no_counters"
19-
;.
20-
; LOWERING: @__llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat
21-
; LOWERING: @__profn_foo = private constant [3 x i8] c"foo"
22-
; LOWERING: @__profn_an_entrypoint = private constant [13 x i8] c"an_entrypoint"
23-
; LOWERING: @__profn_another_entrypoint_no_callees = private constant [29 x i8] c"another_entrypoint_no_callees"
24-
; LOWERING: @__profn_simple = private constant [6 x i8] c"simple"
25-
; LOWERING: @__profn_no_callsites = private constant [12 x i8] c"no_callsites"
26-
; LOWERING: @__profn_no_counters = private constant [11 x i8] c"no_counters"
2712
; LOWERING: @an_entrypoint_ctx_root = global { ptr, ptr, ptr, i8 } zeroinitializer
2813
; LOWERING: @another_entrypoint_no_callees_ctx_root = global { ptr, ptr, ptr, i8 } zeroinitializer
2914
; LOWERING: @__llvm_ctx_profile_callsite = external hidden thread_local global ptr
@@ -32,33 +17,48 @@ declare void @bar()
3217
define void @foo(i32 %a, ptr %fct) {
3318
; INSTRUMENT-LABEL: define void @foo(
3419
; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
20+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 728453322856651412, i32 2, i32 0)
3521
; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
3622
; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
3723
; INSTRUMENT: yes:
38-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
39-
; INSTRUMENT-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
40-
; INSTRUMENT-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
24+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 728453322856651412, i32 2, i32 1)
25+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
4126
; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]])
4227
; INSTRUMENT-NEXT: br label [[EXIT:%.*]]
4328
; INSTRUMENT: no:
44-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
29+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
4530
; INSTRUMENT-NEXT: call void @bar()
4631
; INSTRUMENT-NEXT: br label [[EXIT]]
4732
; INSTRUMENT: exit:
4833
; INSTRUMENT-NEXT: ret void
4934
;
5035
; LOWERING-LABEL: define void @foo(
5136
; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) !guid [[META0:![0-9]+]] {
37+
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @foo, i64 6699318081062747564, i32 2, i32 2)
38+
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
39+
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1
40+
; LOWERING-NEXT: [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
41+
; LOWERING-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 [[TMP3]]
42+
; LOWERING-NEXT: [[TMP6:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_callsite)
43+
; LOWERING-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP3]]
44+
; LOWERING-NEXT: [[TMP8:%.*]] = and i64 [[TMP2]], -2
45+
; LOWERING-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
5246
; LOWERING-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
5347
; LOWERING-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
5448
; LOWERING: yes:
55-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
56-
; LOWERING-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
57-
; LOWERING-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
49+
; LOWERING-NEXT: [[TMP10:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [2 x ptr] }, ptr [[TMP9]], i32 0, i32 1, i32 1
50+
; LOWERING-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 4
51+
; LOWERING-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1
52+
; LOWERING-NEXT: store i64 [[TMP12]], ptr [[TMP10]], align 4
53+
; LOWERING-NEXT: store volatile ptr [[FCT]], ptr [[TMP5]], align 8
54+
; LOWERING-NEXT: [[TMP13:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [2 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 0
55+
; LOWERING-NEXT: store volatile ptr [[TMP13]], ptr [[TMP7]], align 8
5856
; LOWERING-NEXT: call void [[FCT]](i32 [[A]])
5957
; LOWERING-NEXT: br label [[EXIT:%.*]]
6058
; LOWERING: no:
61-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
59+
; LOWERING-NEXT: store volatile ptr @bar, ptr [[TMP5]], align 8
60+
; LOWERING-NEXT: [[TMP14:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [2 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 1
61+
; LOWERING-NEXT: store volatile ptr [[TMP14]], ptr [[TMP7]], align 8
6262
; LOWERING-NEXT: call void @bar()
6363
; LOWERING-NEXT: br label [[EXIT]]
6464
; LOWERING: exit:
@@ -79,26 +79,43 @@ exit:
7979
define void @an_entrypoint(i32 %a) {
8080
; INSTRUMENT-LABEL: define void @an_entrypoint(
8181
; INSTRUMENT-SAME: i32 [[A:%.*]]) {
82+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 784007058953177093, i32 2, i32 0)
8283
; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
8384
; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
8485
; INSTRUMENT: yes:
85-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_an_entrypoint, i64 784007058953177093, i32 2, i32 1)
86+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 784007058953177093, i32 2, i32 1)
87+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @an_entrypoint, i64 784007058953177093, i32 1, i32 0, ptr @foo)
8688
; INSTRUMENT-NEXT: call void @foo(i32 1, ptr null)
8789
; INSTRUMENT-NEXT: ret void
8890
; INSTRUMENT: no:
89-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_an_entrypoint, i64 784007058953177093, i32 2, i32 0)
9091
; INSTRUMENT-NEXT: ret void
9192
;
9293
; LOWERING-LABEL: define void @an_entrypoint(
9394
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] {
95+
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1)
96+
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
97+
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1
98+
; LOWERING-NEXT: [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
99+
; LOWERING-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 [[TMP3]]
100+
; LOWERING-NEXT: [[TMP6:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_callsite)
101+
; LOWERING-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP3]]
102+
; LOWERING-NEXT: [[TMP8:%.*]] = and i64 [[TMP2]], -2
103+
; LOWERING-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
94104
; LOWERING-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
95105
; LOWERING-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
96106
; LOWERING: yes:
97-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_an_entrypoint, i64 784007058953177093, i32 2, i32 1)
107+
; LOWERING-NEXT: [[TMP10:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [1 x ptr] }, ptr [[TMP9]], i32 0, i32 1, i32 1
108+
; LOWERING-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 4
109+
; LOWERING-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1
110+
; LOWERING-NEXT: store i64 [[TMP12]], ptr [[TMP10]], align 4
111+
; LOWERING-NEXT: store volatile ptr @foo, ptr [[TMP5]], align 8
112+
; LOWERING-NEXT: [[TMP13:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [1 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 0
113+
; LOWERING-NEXT: store volatile ptr [[TMP13]], ptr [[TMP7]], align 8
98114
; LOWERING-NEXT: call void @foo(i32 1, ptr null)
115+
; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
99116
; LOWERING-NEXT: ret void
100117
; LOWERING: no:
101-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_an_entrypoint, i64 784007058953177093, i32 2, i32 0)
118+
; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
102119
; LOWERING-NEXT: ret void
103120
;
104121
%t = icmp eq i32 %a, 0
@@ -114,24 +131,32 @@ no:
114131
define void @another_entrypoint_no_callees(i32 %a) {
115132
; INSTRUMENT-LABEL: define void @another_entrypoint_no_callees(
116133
; INSTRUMENT-SAME: i32 [[A:%.*]]) {
134+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 0)
117135
; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
118136
; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
119137
; INSTRUMENT: yes:
120-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 1)
138+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 1)
121139
; INSTRUMENT-NEXT: ret void
122140
; INSTRUMENT: no:
123-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 0)
124141
; INSTRUMENT-NEXT: ret void
125142
;
126143
; LOWERING-LABEL: define void @another_entrypoint_no_callees(
127144
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] {
145+
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0)
146+
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
147+
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2
148+
; LOWERING-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
128149
; LOWERING-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
129150
; LOWERING-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
130151
; LOWERING: yes:
131-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 1)
152+
; LOWERING-NEXT: [[TMP5:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [0 x ptr] }, ptr [[TMP4]], i32 0, i32 1, i32 1
153+
; LOWERING-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
154+
; LOWERING-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
155+
; LOWERING-NEXT: store i64 [[TMP7]], ptr [[TMP5]], align 4
156+
; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
132157
; LOWERING-NEXT: ret void
133158
; LOWERING: no:
134-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_another_entrypoint_no_callees, i64 784007058953177093, i32 2, i32 0)
159+
; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
135160
; LOWERING-NEXT: ret void
136161
;
137162
%t = icmp eq i32 %a, 0
@@ -146,7 +171,7 @@ no:
146171
define void @simple(i32 %a) {
147172
; INSTRUMENT-LABEL: define void @simple(
148173
; INSTRUMENT-SAME: i32 [[A:%.*]]) {
149-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_simple, i64 742261418966908927, i32 1, i32 0)
174+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @simple, i64 742261418966908927, i32 1, i32 0)
150175
; INSTRUMENT-NEXT: ret void
151176
;
152177
; LOWERING-LABEL: define void @simple(
@@ -164,24 +189,30 @@ define void @simple(i32 %a) {
164189
define i32 @no_callsites(i32 %a) {
165190
; INSTRUMENT-LABEL: define i32 @no_callsites(
166191
; INSTRUMENT-SAME: i32 [[A:%.*]]) {
192+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @no_callsites, i64 784007058953177093, i32 2, i32 0)
167193
; INSTRUMENT-NEXT: [[C:%.*]] = icmp eq i32 [[A]], 0
168194
; INSTRUMENT-NEXT: br i1 [[C]], label [[YES:%.*]], label [[NO:%.*]]
169195
; INSTRUMENT: yes:
170-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_no_callsites, i64 784007058953177093, i32 2, i32 1)
196+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @no_callsites, i64 784007058953177093, i32 2, i32 1)
171197
; INSTRUMENT-NEXT: ret i32 1
172198
; INSTRUMENT: no:
173-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_no_callsites, i64 784007058953177093, i32 2, i32 0)
174199
; INSTRUMENT-NEXT: ret i32 0
175200
;
176201
; LOWERING-LABEL: define i32 @no_callsites(
177202
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] {
203+
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
204+
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
205+
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2
206+
; LOWERING-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
178207
; LOWERING-NEXT: [[C:%.*]] = icmp eq i32 [[A]], 0
179208
; LOWERING-NEXT: br i1 [[C]], label [[YES:%.*]], label [[NO:%.*]]
180209
; LOWERING: yes:
181-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_no_callsites, i64 784007058953177093, i32 2, i32 1)
210+
; LOWERING-NEXT: [[TMP5:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [0 x ptr] }, ptr [[TMP4]], i32 0, i32 1, i32 1
211+
; LOWERING-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
212+
; LOWERING-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
213+
; LOWERING-NEXT: store i64 [[TMP7]], ptr [[TMP5]], align 4
182214
; LOWERING-NEXT: ret i32 1
183215
; LOWERING: no:
184-
; LOWERING-NEXT: call void @llvm.instrprof.increment(ptr @__profn_no_callsites, i64 784007058953177093, i32 2, i32 0)
185216
; LOWERING-NEXT: ret i32 0
186217
;
187218
%c = icmp eq i32 %a, 0
@@ -194,16 +225,25 @@ no:
194225

195226
define void @no_counters() {
196227
; INSTRUMENT-LABEL: define void @no_counters() {
197-
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_no_counters, i64 742261418966908927, i32 1, i32 0)
228+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @no_counters, i64 742261418966908927, i32 1, i32 0)
229+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @no_counters, i64 742261418966908927, i32 1, i32 0, ptr @bar)
198230
; INSTRUMENT-NEXT: call void @bar()
199231
; INSTRUMENT-NEXT: ret void
200232
;
201233
; LOWERING-LABEL: define void @no_counters(
202234
; LOWERING-SAME: ) !guid [[META5:![0-9]+]] {
203-
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_counters, i64 5458232184388660970, i32 1, i32 0)
235+
; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_counters, i64 5458232184388660970, i32 1, i32 1)
204236
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
237+
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1
238+
; LOWERING-NEXT: [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
239+
; LOWERING-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 [[TMP3]]
240+
; LOWERING-NEXT: [[TMP6:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_callsite)
241+
; LOWERING-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP3]]
205242
; LOWERING-NEXT: [[TMP8:%.*]] = and i64 [[TMP2]], -2
206243
; LOWERING-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
244+
; LOWERING-NEXT: store volatile ptr @bar, ptr [[TMP5]], align 8
245+
; LOWERING-NEXT: [[TMP10:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [1 x i64], [1 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 0
246+
; LOWERING-NEXT: store volatile ptr [[TMP10]], ptr [[TMP7]], align 8
207247
; LOWERING-NEXT: call void @bar()
208248
; LOWERING-NEXT: ret void
209249
;
@@ -212,6 +252,7 @@ define void @no_counters() {
212252
}
213253
;.
214254
; LOWERING: attributes #[[ATTR0:[0-9]+]] = { nounwind }
255+
; LOWERING: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
215256
;.
216257
; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
217258
;.

0 commit comments

Comments
 (0)