Skip to content

Commit 144ddca

Browse files
[MemProf] Avoid duplicate edges between nodes (#113337)
The recent change to add support for cloning indirect calls inadvertantly caused duplicate edges to be created between the same caller/callee pair. This is due to the new moveCalleeEdgeToNewCaller not properly guarding the addition of a new edge (ironically I was testing for that in an assertion, but failed to handle that case specially otherwise). Now simply move the context ids over to any existing edge. This issue in turn led to some assumptions in cloning being violated, resulting in a later crash. Add a test for this case to checkNode.
1 parent 843c2fb commit 144ddca

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,17 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
13521352
}
13531353
assert(NodeContextIds == CalleeEdgeContextIds);
13541354
}
1355+
// FIXME: Since this checking is only invoked under an option, we should
1356+
// change the error checking from using assert to something that will trigger
1357+
// an error on a release build.
1358+
#ifndef NDEBUG
1359+
// Make sure we don't end up with duplicate edges between the same caller and
1360+
// callee.
1361+
DenseSet<ContextNode<DerivedCCG, FuncTy, CallTy> *> NodeSet;
1362+
for (const auto &E : Node->CalleeEdges)
1363+
NodeSet.insert(E->Callee);
1364+
assert(NodeSet.size() == Node->CalleeEdges.size());
1365+
#endif
13551366
}
13561367

13571368
template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -3125,7 +3136,15 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31253136
// from the same callers as the old node. That should be true in the current
31263137
// use case, where we will remove None-type edges after copying over all
31273138
// caller edges from the callee.
3128-
assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller));
3139+
auto *ExistingCallerEdge =
3140+
NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3141+
assert(IsNewNode || ExistingCallerEdge);
3142+
if (ExistingCallerEdge) {
3143+
ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3144+
EdgeContextIdsToMove.end());
3145+
ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3146+
continue;
3147+
}
31293148
auto NewEdge = std::make_shared<ContextEdge>(
31303149
NewCaller, OldCallerEdge->Caller,
31313150
computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,13 @@
186186
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
187187
; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
188188
; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
189+
; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
190+
; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
189191
; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1
190192
; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
191193
; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
194+
; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
195+
; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
192196
; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1
193197
;; In each version of foo we should have promoted the indirect call to two conditional
194198
;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call
@@ -208,10 +212,10 @@
208212
; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
209213
; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
210214

211-
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis
212-
; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
213-
; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis
214-
; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
215+
; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis
216+
; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
217+
; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis
218+
; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
215219
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
216220
; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
217221

@@ -247,8 +251,8 @@
247251
; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
248252
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
249253

250-
; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
251-
; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
254+
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
255+
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
252256
; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
253257

254258
;--- foo.ll
@@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32)
298302
define i32 @_ZN2B03barEj(ptr %this, i32 %s) {
299303
entry:
300304
%call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38
305+
;; Second allocation in this function, to ensure that indirect edges to the
306+
;; same callee are partitioned correctly.
307+
%call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50
301308
store volatile i32 0, ptr %call, align 4
302309
ret i32 0
303310
}
@@ -311,6 +318,9 @@ declare void @_ZdlPvm()
311318
define i32 @_ZN1B3barEj(ptr %this, i32 %s) {
312319
entry:
313320
%call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44
321+
;; Second allocation in this function, to ensure that indirect edges to the
322+
;; same callee are partitioned correctly.
323+
%call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56
314324
store volatile i32 0, ptr %call, align 4
315325
ret i32 0
316326
}
@@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) }
367377
!42 = !{!43, !"cold"}
368378
!43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154}
369379
!44 = !{i64 4457553070050523782}
380+
!45 = !{!46, !48}
381+
!46 = !{!47, !"notcold"}
382+
!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434}
383+
!48 = !{!49, !"cold"}
384+
!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557}
385+
!50 = !{i64 456}
386+
!51 = !{!52, !54}
387+
!52 = !{!53, !"notcold"}
388+
!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298}
389+
!54 = !{!55, !"cold"}
390+
!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154}
391+
!56 = !{i64 789}

0 commit comments

Comments
 (0)